Merge branch 'src-refactoring'

pull/13171/head
Joshua Haberman 14 years ago
commit f07cd8ff1d
  1. 4
      LICENSE
  2. 135
      Makefile
  3. 9
      README
  4. 116
      benchmarks/parsestream.upb_table.c
  5. 75
      core/upb.c
  6. 220
      core/upb.h
  7. 27
      core/upb_atomic.h
  8. 1326
      core/upb_def.c
  9. 176
      core/upb_def.h
  10. 101
      core/upb_msg.c
  11. 96
      core/upb_msg.h
  12. 275
      core/upb_stream.h
  13. 307
      core/upb_stream_vtbl.h
  14. 161
      core/upb_string.c
  15. 342
      core/upb_string.h
  16. 22
      core/upb_table.c
  17. 5
      core/upb_table.h
  18. 2644
      descriptor/descriptor.c
  19. 404
      descriptor/descriptor.h
  20. 2
      gen-deps.sh
  21. 336
      lang_ext/lua/upb.c
  22. 49
      src/upb.c
  23. 500
      src/upb_data.c
  24. 552
      src/upb_data.h
  25. 494
      src/upb_decoder.c
  26. 56
      src/upb_decoder.h
  27. 823
      src/upb_def.c
  28. 73
      src/upb_encoder.h
  29. 20
      src/upb_inlinedefs.c
  30. 155
      src/upb_sink.h
  31. 165
      src/upb_string.h
  32. 121
      src/upb_text.c
  33. 36
      src/upb_text.h
  34. 429
      stream/upb_decoder.c
  35. 53
      stream/upb_decoder.h
  36. 3
      stream/upb_encoder.c
  37. 56
      stream/upb_encoder.h
  38. 104
      stream/upb_stdio.c
  39. 42
      stream/upb_stdio.h
  40. 71
      stream/upb_strstream.c
  41. 61
      stream/upb_strstream.h
  42. 143
      stream/upb_textprinter.c
  43. 29
      stream/upb_textprinter.h
  44. 42
      tests/test_decoder.c
  45. 25
      tests/test_def.c
  46. 127
      tests/test_stream.c
  47. 126
      tests/test_string.c
  48. 21
      tests/test_table.cc
  49. 54
      tests/test_vs_proto2.cc
  50. 497
      upb.xcodeproj/project.pbxproj

@ -1,6 +1,6 @@
Copyright (c) 2009, Joshua Haberman
Copyright (c) 2009, Google Inc.
Copyright (c) 2009-2010, Joshua Haberman
Copyright (c) 2009-2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without

@ -27,34 +27,68 @@ rwildcard=$(strip $(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2)$(filter $
CC=gcc
CXX=g++
CFLAGS=-std=c99
INCLUDE=-Idescriptor -Isrc -Itests -I.
CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags))
LDLIBS=-lpthread
LIBUPB=src/libupb.a
LIBUPB_PIC=src/libupb_pic.a
LIBUPB_SHARED=src/libupb.so
ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) tools/upbc
INCLUDE=-Idescriptor -Icore -Itests -Istream -I.
CPPFLAGS=-Wall -Wextra -Wno-missing-field-initializers -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && cat perf-cppflags))
LDLIBS=-lpthread core/libupb.a
ifeq ($(shell uname), Darwin)
CPPFLAGS += -I/usr/include/lua5.1
LDFLAGS += -L/usr/local/lib -llua
else
CFLAGS += $(strip $(shell pkg-config --silence-errors --cflags lua || pkg-config --cflags lua5.1))
LDFLAGS += $(strip $(shell pkg-config --silence-errors --libs lua || pkg-config --libs lua5.1))
endif
LIBUPB=core/libupb.a
LIBUPB_PIC=core/libupb_pic.a
LIBUPB_SHARED=core/libupb.so
ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC)
all: $(ALL)
clean:
rm -rf $(LIBUPB) $(LIBUPB_PIC)
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo)
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*)
rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
rm -rf tests/tests tests/t.* tests/test_table
rm -rf descriptor/descriptor.proto.pb
rm -rf $(TESTS) tests/t.*
rm -rf descriptor/descriptor.pb
rm -rf tools/upbc deps
cd lang_ext/python && python setup.py clean --all
# The core library (src/libupb.a)
SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_data.c \
src/upb_encoder.c descriptor/descriptor.c src/upb_text.c
-include deps
deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h)
@./gen-deps.sh $(SRC)
# The core library -- the absolute minimum you must compile in to successfully
# bootstrap.
CORE= \
core/upb.c \
core/upb_table.c \
core/upb_string.c \
core/upb_def.c \
descriptor/descriptor.c
# Common encoders/decoders and upb_msg -- you're almost certain to want these.
STREAM= \
stream/upb_decoder.c \
stream/upb_stdio.c \
stream/upb_textprinter.c \
stream/upb_strstream.c \
core/upb_msg.c \
SRC=$(CORE) $(STREAM)
$(SRC): perf-cppflags
# Parts of core that are yet to be converted.
OTHERSRC=src/upb_encoder.c src/upb_text.c
# Override the optimization level for upb_def.o, because it is not in the
# critical path but gets very large when -O3 is used.
src/upb_def.o: src/upb_def.c
core/upb_def.o: core/upb_def.c
$(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $<
src/upb_def.lo: src/upb_def.c
core/upb_def.lo: core/upb_def.c
$(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC
lang_ext/lua/upb.so: lang_ext/lua/upb.lo
$(CC) $(CFLAGS) $(CPPFLAGS) -shared -o $@ $< core/libupb_pic.a
STATICOBJ=$(patsubst %.c,%.o,$(SRC))
SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC))
# building shared objects is like building static ones, except -fPIC is added.
@ -67,12 +101,12 @@ $(LIBUPB_SHARED): $(SHAREDOBJ)
$(CC) -shared -o $(LIBUPB_SHARED) $(SHAREDOBJ)
# Regenerating the auto-generated files in descriptor/.
descriptor/descriptor.proto.pb: descriptor/descriptor.proto
descriptor/descriptor.pb: descriptor/descriptor.proto
# TODO: replace with upbc
protoc descriptor/descriptor.proto -odescriptor/descriptor.proto.pb
protoc descriptor/descriptor.proto -odescriptor/descriptor.pb
descriptorgen: descriptor/descriptor.proto.pb tools/upbc
./tools/upbc -i upb_file_descriptor_set -o descriptor/descriptor descriptor/descriptor.proto.pb
descriptorgen: descriptor/descriptor.pb
cd descriptor && xxd -i descriptor.pb > descriptor.c
# Language extensions.
python: $(LIBUPB_PIC)
@ -83,25 +117,33 @@ tests/test.proto.pb: tests/test.proto
# TODO: replace with upbc
protoc tests/test.proto -otests/test.proto.pb
tests: tests/tests \
TESTS=tests/test_string \
tests/test_table \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
tests/test.proto.pb
tests/test_def \
tests/test_stream \
tests/test_decoder \
# tests/t.test_vs_proto2.googlemessage1 \
# tests/t.test_vs_proto2.googlemessage2 \
# tests/test.proto.pb
tests: $(LIBUPB) $(TESTS)
OTHER_TESTS=tests/tests \
$(TESTS): $(LIBUPB)
#VALGRIND=valgrind --leak-check=full --error-exitcode=1
VALGRIND=
VALGRIND=valgrind --leak-check=full --error-exitcode=1
#VALGRIND=
test: tests
@echo Running all tests under valgrind.
$(VALGRIND) ./tests/tests
@set -e # Abort on error.
# Needs to be rewritten to separate the benchmark.
# valgrind --error-exitcode=1 ./tests/test_table
@for test in tests/t.* ; do \
if [ -f ./$$test ] ; then \
echo $(VALGRIND) ./$$test: \\c; \
$(VALGRIND) ./$$test; \
@for test in $(TESTS); do \
if [ -x ./$$test ] ; then \
echo !!! $(VALGRIND) ./$$test; \
$(VALGRIND) ./$$test || exit 1; \
fi \
done;
done; \
echo "All tests passed!"
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2: \
@ -125,16 +167,16 @@ tests/test_table: tests/test_table.cc
# Includes <hash_set> which is a deprecated header.
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -Wno-deprecated -o $@ $< $(LIBUPB)
tests/tests: src/libupb.a
tests/tests: core/libupb.a
# Tools
tools/upbc: src/libupb.a
tools/upbc: core/libupb.a
# Benchmarks
UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table_byval \
benchmarks/b.parsetostruct_googlemessage1.upb_table_byref \
benchmarks/b.parsetostruct_googlemessage2.upb_table_byval \
benchmarks/b.parsetostruct_googlemessage2.upb_table_byref
#UPB_BENCHMARKS=benchmarks/b.parsetostruct_googlemessage1.upb_table \
# benchmarks/b.parsetostruct_googlemessage2.upb_table
UPB_BENCHMARKS=benchmarks/b.parsestream_googlemessage1.upb_table \
benchmarks/b.parsestream_googlemessage2.upb_table
BENCHMARKS=$(UPB_BENCHMARKS) \
benchmarks/b.parsetostruct_googlemessage1.proto2_table \
@ -181,6 +223,20 @@ benchmarks/b.parsetostruct_googlemessage2.upb_table_byref: \
-DMESSAGE_FILE=\"google_message2.dat\" \
-DBYREF=true $(LIBUPB)
benchmarks/b.parsestream_googlemessage1.upb_table \
benchmarks/b.parsestream_googlemessage2.upb_table: \
benchmarks/parsestream.upb_table.c $(LIBUPB) benchmarks/google_messages.proto.pb
$(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage1.upb_table $< \
-DMESSAGE_NAME=\"benchmarks.SpeedMessage1\" \
-DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
-DMESSAGE_FILE=\"google_message1.dat\" \
$(LIBUPB)
$(CC) $(CFLAGS) $(CPPFLAGS) -o benchmarks/b.parsestream_googlemessage2.upb_table $< \
-DMESSAGE_NAME=\"benchmarks.SpeedMessage2\" \
-DMESSAGE_DESCRIPTOR_FILE=\"google_messages.proto.pb\" \
-DMESSAGE_FILE=\"google_message2.dat\" \
$(LIBUPB)
benchmarks/b.parsetostruct_googlemessage1.proto2_table \
benchmarks/b.parsetostruct_googlemessage2.proto2_table: \
benchmarks/parsetostruct.proto2_table.cc benchmarks/google_messages.pb.cc
@ -210,6 +266,3 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \
-DMESSAGE_HFILE=\"google_messages.pb.h\" \
benchmarks/google_messages.pb.cc -lprotobuf -lpthread
-include deps
deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h)
@./gen-deps.sh $(SRC)

@ -10,6 +10,11 @@ ROADMAP OF THE SOURCE
benchmark/
Benchmarks of upb and other protocol buffer implementations.
core/
The core source directory. builds into core/libupb.a. Contains only the
very core library, which is capable of loading descriptors given the
appropriate decoder. Does not even contain decoders for the standard
formats like the protobuf text and binary formats.
descriptor/
Files that describe the format of Protocol Buffer "descriptors", which are
protocol buffers that describe the format of other protocol buffers. These
@ -19,8 +24,8 @@ labs/
about alternate ways of implementing things. When possible, these are
benchmarked by the tests in benchmark/. We also test these with the tests
in tests/, to ensure that the alternate implementations are actually correct.
src/
The core source directory. builds into src/libupb.a.
stream/
Implementations of streaming protobuf encoders and decoders.
tests/
Unit tests.
tools/

@ -0,0 +1,116 @@
#include "main.c"
#include "upb_def.h"
#include "upb_decoder.h"
#include "upb_strstream.h"
static upb_stringsrc *stringsrc;
static upb_string *input_str;
static upb_string *tmp_str;
static upb_msgdef *def;
static upb_decoder *decoder;
static bool initialize()
{
// Initialize upb state, decode descriptor.
upb_status status = UPB_STATUS_INIT;
upb_symtab *s = upb_symtab_new();
upb_symtab_add_descriptorproto(s);
upb_def *fds_def = upb_symtab_lookup(
s, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
if (!fds_def) {
fprintf(stderr, "Couldn't load FileDescriptorSet def");
}
upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(fds_str == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"),
upb_printerr(&status);
return false;
}
upb_stringsrc *ssrc = upb_stringsrc_new();
upb_stringsrc_reset(ssrc, fds_str);
upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds_def));
upb_decoder_reset(d, upb_stringsrc_bytesrc(ssrc));
upb_symtab_addfds(s, upb_decoder_src(d), &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":");
upb_printerr(&status);
return false;
}
upb_string_unref(fds_str);
upb_decoder_free(d);
upb_stringsrc_free(ssrc);
upb_def_unref(fds_def);
def = upb_downcast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME)));
if(!def) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(UPB_STRLIT(MESSAGE_NAME)));
return false;
}
upb_symtab_unref(s);
// Read the message data itself.
input_str = upb_strreadfile(MESSAGE_FILE);
if(input_str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
}
tmp_str = NULL;
decoder = upb_decoder_new(def);
stringsrc = upb_stringsrc_new();
return true;
}
static void cleanup()
{
upb_string_unref(input_str);
upb_string_unref(tmp_str);
upb_def_unref(UPB_UPCAST(def));
upb_decoder_free(decoder);
upb_stringsrc_free(stringsrc);
}
static size_t run(int i)
{
(void)i;
upb_status status = UPB_STATUS_INIT;
upb_stringsrc_reset(stringsrc, input_str);
upb_decoder_reset(decoder, upb_stringsrc_bytesrc(stringsrc));
upb_src *src = upb_decoder_src(decoder);
upb_fielddef *f;
int depth = 0;
while(1) {
while(!upb_src_eof(src) && (f = upb_src_getdef(src)) != NULL) {
if(upb_issubmsg(f)) {
upb_src_startmsg(src);
++depth;
} else if(upb_isstring(f)) {
tmp_str = upb_string_tryrecycle(tmp_str);
upb_src_getstr(src, tmp_str);
} else {
// Primitive type.
upb_value val;
upb_src_getval(src, upb_value_addrof(&val));
}
}
// If we're not EOF now, the loop terminated due to an error.
if (!upb_src_eof(src)) goto err;
if (depth == 0) break;
--depth;
upb_src_endmsg(src);
}
if(!upb_ok(&status)) goto err;
return upb_string_len(input_str);
err:
fprintf(stderr, "Decode error");
upb_printerr(&status);
return 0;
}

@ -0,0 +1,75 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
*/
#include <stdarg.h>
#include <stddef.h>
#include <string.h>
#include "upb.h"
#include "upb_string.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(wire_type, ctype, allows_delimited) \
{alignof(ctype), sizeof(ctype), wire_type, \
(1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \
#ctype},
const upb_type_info upb_types[] = {
{0, 0, 0, 0, ""}, // There is no type 0.
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // INT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1) // UINT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // INT32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1) // FIXED64
TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1) // FIXED32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1) // BOOL
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // STRING
TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0) // GROUP
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // MESSAGE
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1) // BYTES
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // UINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1) // ENUM
TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1) // SFIXED32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1) // SFIXED64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1) // SINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1) // SINT64
};
void upb_seterr(upb_status *status, enum upb_status_code code,
const char *msg, ...) {
status->code = code;
upb_string_recycle(&status->str);
va_list args;
va_start(args, msg);
upb_string_vprintf(status->str, msg, args);
va_end(args);
}
void upb_copyerr(upb_status *to, upb_status *from)
{
to->code = from->code;
if(from->str) to->str = upb_string_getref(from->str);
}
void upb_clearerr(upb_status *status) {
status->code = UPB_OK;
upb_string_recycle(&status->str);
}
void upb_printerr(upb_status *status) {
if(status->str) {
fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n",
status->code, UPB_STRARG(status->str));
} else {
fprintf(stderr, "code: %d, no msg\n", status->code);
}
}
void upb_status_uninit(upb_status *status) {
upb_string_unref(status->str);
}

@ -12,6 +12,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h> // only for size_t.
#include <assert.h>
#include "descriptor_const.h"
#include "upb_atomic.h"
@ -58,6 +59,10 @@ typedef int16_t upb_field_count_t;
// unlimited nesting if we do not limit it.
#define UPB_MAX_TYPE_DEPTH 64
// The biggest possible single value is a 10-byte varint.
#define UPB_MAX_ENCODED_SIZE 10
/* Fundamental types and type constants. **************************************/
// A list of types as they are encoded on-the-wire.
@ -67,39 +72,36 @@ enum upb_wire_type {
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
UPB_WIRE_TYPE_32BIT = 5,
// This isn't a real wire type, but we use this constant to describe varints
// that are expected to be a maximum of 32 bits.
UPB_WIRE_TYPE_32BIT_VARINT = 8
};
typedef uint8_t upb_wire_type_t;
// Value type as defined in a .proto file. eg. string, int32, etc. The
// Type of a field as defined in a .proto file. eg. string, int32, etc. The
// integers that represent this are defined by descriptor.proto. Note that
// descriptor.proto reserves "0" for errors, and we use it to represent
// exceptional circumstances.
typedef uint8_t upb_field_type_t;
typedef uint8_t upb_fieldtype_t;
// For referencing the type constants tersely.
#define UPB_TYPE(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
#define UPB_LABEL(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_ ## type
INLINE bool upb_issubmsgtype(upb_field_type_t type) {
return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_isstringtype(upb_field_type_t type) {
return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
}
// Info for a given field type.
typedef struct {
uint8_t align;
uint8_t size;
upb_wire_type_t expected_wire_type;
upb_wire_type_t native_wire_type;
uint8_t allowed_wire_types; // For packable fields, also allows delimited.
char *ctype;
} upb_type_info;
// A static array of info about all of the field types, indexed by type number.
extern upb_type_info upb_types[];
extern const upb_type_info upb_types[];
// The number of a field, eg. "optional string foo = 3".
typedef int32_t upb_field_number_t;
@ -116,59 +118,92 @@ typedef union {
uint32_t _32bit;
} upb_wire_value;
// A tag occurs before each value on-the-wire.
typedef struct {
upb_field_number_t field_number;
upb_wire_type_t wire_type;
} upb_tag;
/* Polymorphic values of .proto types *****************************************/
// INTERNAL-ONLY: never refer to these types with a tag ("union", "struct").
// Always use the typedefs.
struct _upb_string;
typedef struct _upb_string upb_string;
struct _upb_array;
typedef struct _upb_array upb_array;
struct _upb_msg;
typedef struct _upb_msg upb_msg;
struct _upb_bytesrc;
typedef struct _upb_bytesrc upb_bytesrc;
typedef upb_atomic_refcount_t upb_data;
typedef uint32_t upb_strlen_t;
struct upb_norefcount_string;
struct upb_refcounted_string;
typedef union {
// Must be first, for the UPB_STATIC_STRING_PTR_INIT() macro.
struct upb_norefcount_string *norefcount;
struct upb_refcounted_string *refcounted;
upb_data *base;
} upb_strptr;
typedef uint32_t upb_arraylen_t;
typedef int32_t upb_strlen_t;
#define UPB_STRLEN_MAX INT32_MAX
typedef union {
// Must be first, for the UPB_STATIC_ARRAY_PTR_INIT() macro.
struct upb_norefcount_array *norefcount;
struct upb_refcounted_array *refcounted;
upb_data *base;
} upb_arrayptr;
// The type of a upb_value. This is like a upb_fieldtype_t, but adds the
// constant UPB_VALUETYPE_ARRAY to represent an array.
typedef uint8_t upb_valuetype_t;
#define UPB_VALUETYPE_ARRAY 32
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
typedef union {
double _double;
float _float;
int32_t int32;
int64_t int64;
uint32_t uint32;
uint64_t uint64;
bool _bool;
upb_strptr str;
upb_arrayptr arr;
upb_msg *msg;
upb_data *data;
typedef struct {
union {
double _double;
float _float;
int32_t int32;
int64_t int64;
uint32_t uint32;
uint64_t uint64;
bool _bool;
upb_string *str;
upb_bytesrc *bytesrc;
upb_msg *msg;
upb_array *arr;
upb_atomic_refcount_t *refcount;
void *_void;
} val;
// In debug mode we carry the value type around also so we can check accesses
// to be sure the right member is being read.
#ifndef NDEBUG
upb_valuetype_t type;
#endif
} upb_value;
#ifdef NDEBUG
#define SET_TYPE(dest, val)
#else
#define SET_TYPE(dest, val) dest = val
#endif
#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
INLINE ctype upb_value_get ## name(upb_value val) { \
assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
SET_TYPE(val->type, proto_type); \
val->val.membername = cval; \
}
UPB_VALUE_ACCESSORS(double, _double, double, UPB_TYPE(DOUBLE));
UPB_VALUE_ACCESSORS(float, _float, float, UPB_TYPE(FLOAT));
UPB_VALUE_ACCESSORS(int32, int32, int32_t, UPB_TYPE(INT32));
UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING));
UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE));
UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY);
UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC);
INLINE void upb_value_setraw(upb_value *val, uint64_t cval) {
SET_TYPE(val->type, UPB_VALUETYPE_RAW);
val->val.uint64 = cval;
}
INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) {
assert(val.type == UPB_TYPE(MESSAGE) ||
val.type == UPB_TYPE(STRING) ||
val.type == UPB_VALUETYPE_ARRAY);
return val.val.refcount;
}
// A pointer to a .proto value. The owner must have an out-of-band way of
// knowing the type, so it knows which union member to use.
typedef union {
@ -180,28 +215,29 @@ typedef union {
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
upb_strptr *str;
upb_arrayptr *arr;
upb_string **str;
upb_msg **msg;
upb_data **data;
upb_array **arr;
void *_void;
} upb_valueptr;
INLINE upb_valueptr upb_value_addrof(upb_value *val) {
upb_valueptr ptr = {&val->_double};
upb_valueptr ptr = {&val->val._double};
return ptr;
}
/**
* Converts upb_value_ptr -> upb_value by reading from the pointer. We need to
* know the field type to perform this operation, because we need to know how
* much memory to copy.
*/
INLINE upb_value upb_value_read(upb_valueptr ptr, upb_field_type_t ft) {
// Reads or writes a upb_value from an address represented by a upb_value_ptr.
// We need to know the value type to perform this operation, because we need to
// know how much memory to copy (and for big-endian machines, we need to know
// where in the upb_value the data goes).
//
// For little endian-machines where we didn't mind overreading, we could make
// upb_value_read simply use memcpy().
INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
upb_value val;
#define CASE(t, member_name) \
case UPB_TYPE(t): val.member_name = *ptr.member_name; break;
case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
@ -229,15 +265,10 @@ INLINE upb_value upb_value_read(upb_valueptr ptr, upb_field_type_t ft) {
#undef CASE
}
/**
* Writes a upb_value to a upb_value_ptr location. We need to know the field
* type to perform this operation, because we need to know how much memory to
* copy.
*/
INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
upb_field_type_t ft) {
upb_fieldtype_t ft) {
#define CASE(t, member_name) \
case UPB_TYPE(t): *ptr.member_name = val.member_name; break;
case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
@ -267,40 +298,49 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
// Status codes used as a return value. Codes >0 are not fatal and can be
// resumed.
enum upb_status_code {
UPB_STATUS_OK = 0,
// The operation completed successfully.
UPB_OK = 0,
// The input byte stream ended in the middle of a record.
UPB_STATUS_NEED_MORE_DATA = 1,
// The bytesrc is at EOF and all data was read successfully.
UPB_EOF = 1,
// An unrecoverable error occurred.
UPB_STATUS_ERROR = -1,
// A read or write from a streaming src/sink could not be completed right now.
UPB_TRYAGAIN = 2,
// A varint went for 10 bytes without terminating.
UPB_ERROR_UNTERMINATED_VARINT = -2,
// An unrecoverable error occurred.
UPB_ERROR = -1,
// The max nesting level (UPB_MAX_NESTING) was exceeded.
UPB_ERROR_MAX_NESTING_EXCEEDED = -3
// A recoverable error occurred (for example, data of the wrong type was
// encountered which we can skip over).
// UPB_STATUS_RECOVERABLE_ERROR = -2
};
#define UPB_ERRORMSG_MAXLEN 256
// TODO: consider adding error space and code, to let ie. errno be stored
// as a proper code, or application-specific error codes.
typedef struct {
enum upb_status_code code;
char msg[UPB_ERRORMSG_MAXLEN];
char code;
upb_string *str;
} upb_status;
#define UPB_STATUS_INIT {UPB_STATUS_OK, ""}
#define UPB_STATUS_INIT {UPB_OK, NULL}
#define UPB_ERRORMSG_MAXLEN 256
INLINE bool upb_ok(upb_status *status) {
return status->code == UPB_STATUS_OK;
return status->code == UPB_OK;
}
INLINE void upb_reset(upb_status *status) {
status->code = UPB_STATUS_OK;
status->msg[0] = '\0';
INLINE void upb_status_init(upb_status *status) {
status->code = UPB_OK;
status->str = NULL;
}
void upb_status_uninit(upb_status *status);
void upb_printerr(upb_status *status);
void upb_clearerr(upb_status *status);
void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg,
...);
void upb_copyerr(upb_status *to, upb_status *from);
#ifdef __cplusplus
} /* extern "C" */

@ -29,7 +29,6 @@ extern "C" {
#define INLINE static inline
#endif
#define UPB_THREAD_UNSAFE
#ifdef UPB_THREAD_UNSAFE
/* Non-thread-safe implementations. ******************************************/
@ -65,15 +64,6 @@ INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) {
return ret;
}
typedef struct {
} upb_rwlock_t;
INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
#endif
/* Atomic refcount ************************************************************/
@ -111,10 +101,6 @@ INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
return __sync_fetch_and_add(&a->v, 0);
}
INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) {
a->v = val;
}
#elif defined(WIN32)
/* Windows defines atomic increment/decrement. */
@ -141,11 +127,22 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
Implement them or compile with UPB_THREAD_UNSAFE.
#endif
INLINE bool upb_atomic_only(upb_atomic_refcount_t *a) {
return upb_atomic_read(a) == 1;
}
/* Reader/Writer lock. ********************************************************/
#ifdef UPB_THREAD_UNSAFE
/* Already defined. */
typedef struct {
} upb_rwlock_t;
INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
#elif defined(UPB_USE_PTHREADS)

File diff suppressed because it is too large Load Diff

@ -1,17 +1,18 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
* Copyright (c) 2009-2011 Joshua Haberman. See LICENSE for details.
*
* Provides definitions of .proto constructs:
* Provides a mechanism for loading proto definitions from descriptors, and
* data structures to represent those definitions. These form the protobuf
* schema, and are used extensively throughout upb:
* - upb_msgdef: describes a "message" construct.
* - upb_fielddef: describes a message field.
* - upb_enumdef: describes an enum.
* (TODO: definitions of extensions and services).
*
* Defs are obtained from a upb_symtab object. A upb_symtab is empty when
* constructed, and definitions can be added by supplying serialized
* descriptors.
* constructed, and definitions can be added by supplying descriptors.
*
* Defs are immutable and reference-counted. Symbol tables reference any defs
* that are the "current" definitions. If an extension is loaded that adds a
@ -27,6 +28,7 @@
#define UPB_DEF_H_
#include "upb_atomic.h"
#include "upb_stream.h"
#include "upb_table.h"
#ifdef __cplusplus
@ -37,7 +39,7 @@ extern "C" {
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
enum upb_def_type {
typedef enum {
UPB_DEF_MSG = 0,
UPB_DEF_ENUM,
UPB_DEF_SVC,
@ -47,15 +49,15 @@ enum upb_def_type {
// For specifying that defs of any type are requsted from getdefs.
UPB_DEF_ANY = -1
};
} upb_deftype;
// This typedef is more space-efficient than declaring an enum var directly.
typedef int8_t upb_def_type_t;
typedef int8_t upb_deftype_t;
typedef struct {
upb_strptr fqname; // Fully qualified.
upb_string *fqname; // Fully qualified.
upb_atomic_refcount_t refcount;
upb_def_type_t type;
upb_deftype_t type;
// The is_cyclic flag could go in upb_msgdef instead of here, because only
// messages can be involved in cycles. However, putting them here is free
@ -76,7 +78,7 @@ INLINE void upb_def_ref(upb_def *def) {
if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
}
INLINE void upb_def_unref(upb_def *def) {
if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
/* upb_fielddef ***************************************************************/
@ -86,29 +88,35 @@ INLINE void upb_def_unref(upb_def *def) {
// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
// It is also reference-counted.
typedef struct _upb_fielddef {
upb_atomic_refcount_t refcount;
upb_field_type_t type;
upb_label_t label;
upb_field_number_t number;
upb_strptr name;
upb_value default_value;
// These are set only when this fielddef is part of a msgdef.
upb_string *name;
struct _upb_msgdef *msgdef;
// For the case of an enum or a submessage, points to the def for that type.
upb_def *def;
upb_atomic_refcount_t refcount;
uint32_t byte_offset; // Where in a upb_msg to find the data.
// These are set only when this fielddef is part of a msgdef.
upb_field_count_t field_index; // Indicates set bit.
// For the case of an enum or a submessage, points to the def for that type.
// We own a ref on this def.
upb_field_number_t number;
upb_fieldtype_t type;
upb_label_t label;
// True if we own a ref on "def" (above). This is true unless this edge is
// part of a cycle.
bool owned;
upb_def *def;
} upb_fielddef;
// A variety of tests about the type of a field.
INLINE bool upb_issubmsg(upb_fielddef *f) {
return upb_issubmsgtype(f->type);
return f->type == UPB_TYPE(GROUP) || f->type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_isstring(upb_fielddef *f) {
return upb_isstringtype(f->type);
return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
}
INLINE bool upb_isarray(upb_fielddef *f) {
return f->label == UPB_LABEL(REPEATED);
@ -118,6 +126,19 @@ INLINE bool upb_hasdef(upb_fielddef *f) {
return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}
INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
if (upb_isarray(f)) {
return UPB_VALUETYPE_ARRAY;
} else {
return f->type;
}
}
INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
assert(upb_isarray(f));
return f->type;
}
INLINE bool upb_field_ismm(upb_fielddef *f) {
return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}
@ -126,28 +147,14 @@ INLINE bool upb_elem_ismm(upb_fielddef *f) {
return upb_isstring(f) || upb_issubmsg(f);
}
// Internal-only interface for the upb compiler.
// Sorts the given fielddefs in-place, according to what we think is an optimal
// ordering of fields. This can change from upb release to upb release.
struct google_protobuf_FieldDescriptorProto;
void upb_fielddef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
size_t num);
/* upb_msgdef *****************************************************************/
struct google_protobuf_EnumDescriptorProto;
struct google_protobuf_DescriptorProto;
// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
upb_def base;
upb_atomic_refcount_t cycle_refcount;
upb_msg *default_msg; // Message with all default values set.
size_t size;
upb_field_count_t num_fields;
uint32_t size;
uint32_t set_flags_bytes;
uint32_t num_required_fields; // Required fields have the lowest set bytemasks.
upb_fielddef *fields; // We have exclusive ownership of these.
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
@ -167,17 +174,37 @@ typedef struct {
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) {
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) {
upb_itof_ent *e =
(upb_itof_ent*)upb_inttable_fastlookup(&m->itof, num, sizeof(*e));
return e ? e->f : NULL;
}
INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_strptr name) {
INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
return e ? e->f : NULL;
}
INLINE upb_field_count_t upb_msgdef_numfields(upb_msgdef *m) {
return upb_strtable_count(&m->ntof);
}
// Iteration over fields. The order is undefined.
// upb_msg_iter i;
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
// upb_fielddef *f = upb_msg_iter_field(i);
// // ...
// }
typedef upb_itof_ent *upb_msg_iter;
upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return iter == NULL; }
INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
return iter->f;
}
/* upb_enumdef ****************************************************************/
typedef struct _upb_enumdef {
@ -186,26 +213,41 @@ typedef struct _upb_enumdef {
upb_inttable iton;
} upb_enumdef;
typedef struct {
upb_strtable_entry e;
uint32_t value;
} upb_ntoi_ent;
typedef struct {
upb_inttable_entry e;
upb_string *string;
} upb_iton_ent;
typedef int32_t upb_enumval_t;
// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_strptr name, upb_enumval_t *num);
upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
// Caller does not own a ref on the returned string.
upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
// Iteration over name/value pairs. The order is undefined.
// upb_enum_iter i;
// for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// // ...
// }
typedef struct {
upb_enumdef *e;
void *state; // Internal iteration state.
upb_strptr name;
upb_enumval_t val;
} upb_enum_iter;
void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e);
void upb_enum_next(upb_enum_iter *iter);
bool upb_enum_done(upb_enum_iter *iter);
typedef upb_iton_ent *upb_enum_iter;
upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return iter == NULL; }
INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
return iter->string;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
return iter->e.key;
}
/* upb_symtab *****************************************************************/
@ -215,11 +257,7 @@ bool upb_enum_done(upb_enum_iter *iter);
typedef struct {
upb_atomic_refcount_t refcount;
upb_rwlock_t lock; // Protects all members except the refcount.
upb_msgdef *fds_msgdef; // In psymtab, ptr here for convenience.
// Our symbol tables; we own refs to the defs therein.
upb_strtable symtab; // The main symbol table.
upb_strtable psymtab; // Private symbols, for internal use.
upb_strtable symtab; // The symbol table.
} upb_symtab;
// Initializes a upb_symtab. Contexts are not freed explicitly, but unref'd
@ -242,24 +280,32 @@ INLINE void upb_symtab_unref(upb_symtab *s) {
//
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol);
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym);
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type);
// Adds the definitions in the given serialized descriptor to this symtab. All
// types that are referenced from desc must have previously been defined (or be
// defined in desc). desc may not attempt to define any names that are already
// defined in this symtab. Caller retains ownership of desc. status indicates
// whether the operation was successful or not, and the error message (if any).
void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status);
upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type);
// "fds" is a upb_src that will yield data from the
// google.protobuf.FileDescriptorSet message type. upb_symtab_addfds() adds
// all the definitions from the given FileDescriptorSet and adds them to the
// symtab. status indicates whether the operation was successful or not, and
// the error message (if any).
//
// TODO: should this allow redefinition? Either is possible, but which is
// more useful? Maybe it should be an option.
void upb_symtab_addfds(upb_symtab *s, upb_src *desc, upb_status *status);
// Adds defs for google.protobuf.FileDescriptorSet and friends to this symtab.
// This is necessary for bootstrapping, since these are the upb_defs that
// specify other defs and allow them to be loaded.
void upb_symtab_add_descriptorproto(upb_symtab *s);
/* upb_def casts **************************************************************/

@ -0,0 +1,101 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*
* Data structure for storing a message of protobuf data.
*/
#include "upb_msg.h"
#include "upb_decoder.h"
#include "upb_strstream.h"
static void upb_elem_free(upb_value v, upb_fielddef *f) {
switch(f->type) {
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP):
_upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def));
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
_upb_string_free(upb_value_getstr(v));
break;
default:
abort();
}
}
static void upb_elem_unref(upb_value v, upb_fielddef *f) {
assert(upb_elem_ismm(f));
upb_atomic_refcount_t *refcount = upb_value_getrefcount(v);
if (refcount && upb_atomic_unref(refcount))
upb_elem_free(v, f);
}
static void upb_field_free(upb_value v, upb_fielddef *f) {
if (upb_isarray(f)) {
_upb_array_free(upb_value_getarr(v), f);
} else {
upb_elem_free(v, f);
}
}
static void upb_field_unref(upb_value v, upb_fielddef *f) {
assert(upb_field_ismm(f));
upb_atomic_refcount_t *refcount = upb_value_getrefcount(v);
if (refcount && upb_atomic_unref(refcount))
upb_field_free(v, f);
}
upb_msg *upb_msg_new(upb_msgdef *md) {
upb_msg *msg = malloc(md->size);
// Clear all set bits and cached pointers.
memset(msg, 0, md->size);
upb_atomic_refcount_init(&msg->refcount, 1);
return msg;
}
void _upb_msg_free(upb_msg *msg, upb_msgdef *md) {
// Need to release refs on all sub-objects.
upb_msg_iter i;
for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
upb_valueptr p = _upb_msg_getptr(msg, f);
upb_valuetype_t type = upb_field_valuetype(f);
if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f);
}
free(msg);
}
INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->field_index/8] |= (1 << (f->field_index % 8));
}
upb_array *upb_array_new(void) {
upb_array *arr = malloc(sizeof(*arr));
upb_atomic_refcount_init(&arr->refcount, 1);
arr->size = 0;
arr->len = 0;
arr->elements._void = NULL;
return arr;
}
void _upb_array_free(upb_array *arr, upb_fielddef *f) {
if (upb_elem_ismm(f)) {
// Need to release refs on sub-objects.
upb_valuetype_t type = upb_elem_valuetype(f);
for (upb_arraylen_t i = 0; i < arr->size; i++) {
upb_valueptr p = _upb_array_getptr(arr, f, i);
upb_elem_unref(upb_value_read(p, type), f);
}
}
if (arr->elements._void) free(arr->elements._void);
free(arr);
}
void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md,
upb_handlers *handlers, bool merge) {
static upb_handlerset handlerset = {
}
}

@ -0,0 +1,96 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details.
*
* Data structure for storing a message of protobuf data. Unlike Google's
* protobuf, upb_msg and upb_array are reference counted instead of having
* exclusive ownership of their fields. This is a better match for dynamic
* languages where statements like a.b = other_b are normal.
*
* upb's parsers and serializers could also be used to populate and serialize
* other kinds of message objects (even one generated by Google's protobuf).
*/
#ifndef UPB_MSG_H
#define UPB_MSG_H
#include "upb.h"
#include "upb_def.h"
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
struct _upb_array {
upb_atomic_refcount_t refcount;
upb_arraylen_t len;
upb_arraylen_t size;
upb_valueptr elements;
};
void _upb_array_free(upb_array *a, upb_fielddef *f);
INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f,
uint32_t elem) {
upb_valueptr p;
p._void = &a->elements.uint8[elem * upb_types[f->type].size];
return p;
}
upb_array *upb_array_new(void);
INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) {
if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f);
}
INLINE uint32_t upb_array_len(upb_array *a) {
return a->len;
}
/* upb_msg ********************************************************************/
struct _upb_msg {
upb_atomic_refcount_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
};
void _upb_msg_free(upb_msg *msg, upb_msgdef *md);
INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p;
p._void = &msg->data[f->byte_offset];
return p;
}
// Creates a new msg of the given type.
upb_msg *upb_msg_new(upb_msgdef *md);
// Unrefs the given message.
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
// Tests whether the given field is explicitly set, or whether it will return a
// default.
INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0;
}
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
}
// Registers a set of handlers that will populate this msgdef.
void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md,
upb_handlers *handlers);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -0,0 +1,275 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* This file defines four general-purpose streaming data interfaces.
*
* - upb_handlers: represents a set of callbacks, very much like in XML's SAX
* API, that a client can register to do a streaming tree traversal over a
* stream of structured protobuf data, without knowing where that data is
* coming from. There is only one upb_handlers type (it is not a virtual
* base class), but the object lets you register any set of handlers.
*
* The upb_handlers interface supports delegation: when entering a submessage,
* you can delegate to another set of upb_handlers instead of handling the
* submessage yourself. This allows upb_handlers objects to *compose* -- you
* can implement a set of upb_handlers without knowing or caring whether this
* is the top-level message or not.
*
* The other interfaces are the C equivalent of "virtual base classes" that
* anyone can implement:
*
* - upb_src: an interface that represents a source of streaming protobuf data.
* It lets you register a set of upb_handlers, and then call upb_src_run(),
* which pulls the protobuf data from somewhere and then calls the handlers.
*
* - upb_bytesrc: a pull interface for streams of bytes, basically an
* abstraction of read()/fread(), but it avoids copies where possible.
*
* - upb_bytesink: push interface for streams of bytes, basically an
* abstraction of write()/fwrite(), but it avoids copies where possible.
*
* All of the encoders and decoders are based on these generic interfaces,
* which lets you write streaming algorithms that do not depend on a specific
* serialization format; for example, you can write a pretty printer that works
* with input that came from protobuf binary format, protobuf text format, or
* even an in-memory upb_msg -- the pretty printer will not know the
* difference.
*
* Copyright (c) 2010-2011 Joshua Haberman. See LICENSE for details.
*
*/
#ifndef UPB_STREAM_H
#define UPB_STREAM_H
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
// Forward-declare. We can't include upb_def.h; it would be circular.
struct _upb_fielddef;
/* upb_handlers ***************************************************************/
// upb_handlers define the interface by which a upb_src passes data to a
// upb_sink.
// Constants that a handler returns to indicate to its caller whether it should
// continue or not.
typedef enum {
// Caller should continue sending values to the sink.
UPB_CONTINUE,
// Stop processing for now; check status for details. If no status was set,
// a generic error will be returned. If the error is resumable, it is not
// (yet) defined where processing will resume -- waiting for real-world
// examples of resumable decoders and resume-requiring clients. upb_src
// implementations that are not capable of resuming will override the return
// status to be non-resumable if a resumable status was set by the handlers.
UPB_BREAK,
// Skips to the end of the current submessage (or if we are at the top
// level, skips to the end of the entire message).
UPB_SKIPSUBMSG,
// When returned from a startsubmsg handler, indicates that the submessage
// should be handled by a different set of handlers, which have been
// registered on the provided upb_handlers object. This allows upb_handlers
// objects to compose; a set of upb_handlers need not know whether it is the
// top-level message or a sub-message. May not be returned from any other
// callback.
UPB_DELEGATE,
} upb_flow_t;
// upb_handlers
struct _upb_handlers;
typedef struct _upb_handlers upb_handlers;
typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure);
typedef upb_flow_t (*upb_endmsg_handler_t)(void *closure);
typedef upb_flow_t (*upb_value_handler_t)(void *closure,
struct _upb_fielddef *f,
upb_value val);
typedef upb_flow_t (*upb_startsubmsg_handler_t)(void *closure,
struct _upb_fielddef *f,
upb_handlers *delegate_to);
typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure);
typedef upb_flow_t (*upb_unknownval_handler_t)(void *closure,
upb_field_number_t fieldnum,
upb_value val);
// An empty set of handlers, for convenient copy/paste:
//
// static upb_flow_t startmsg(void *closure) {
// // Called when the top-level message begins.
// return UPB_CONTINUE;
// }
//
// static upb_flow_t endmsg(void *closure) {
// // Called when the top-level message ends.
// return UPB_CONTINUE;
// }
//
// static upb_flow_t value(void *closure, upb_fielddef *f, upb_value val) {
// // Called for every value in the stream.
// return UPB_CONTINUE;
// }
//
// static upb_flow_t startsubmsg(void *closure, upb_fielddef *f,
// upb_handlers *delegate_to) {
// // Called when a submessage begins; can delegate by returning UPB_DELEGATE.
// return UPB_CONTINUE;
// }
//
// static upb_flow_t endsubmsg(void *closure) {
// // Called when a submessage ends.
// return UPB_CONTINUE;
// }
//
// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum,
// upb_value val) {
// // Called with an unknown value is encountered.
// return UPB_CONTINUE;
// }
//
// // Any handlers you don't need can be set to NULL.
// static upb_handlerset handlers = {
// startmsg,
// endmsg,
// value,
// startsubmsg,
// endsubmsg,
// unknownval,
// };
typedef struct {
upb_startmsg_handler_t startmsg;
upb_endmsg_handler_t endmsg;
upb_value_handler_t value;
upb_startsubmsg_handler_t startsubmsg;
upb_endsubmsg_handler_t endsubmsg;
upb_unknownval_handler_t unknownval;
} upb_handlerset;
// Functions to register handlers on a upb_handlers object.
INLINE void upb_handlers_init(upb_handlers *h);
INLINE void upb_handlers_uninit(upb_handlers *h);
INLINE void upb_handlers_reset(upb_handlers *h);
INLINE bool upb_handlers_isempty(upb_handlers *h);
INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set);
// TODO: for clients that want to increase efficiency by preventing bytesrcs
// from automatically being converted to strings in the value callback.
// INLINE void upb_handlers_use_bytesrcs(bool use_bytesrcs);
// The closure will be passed to every handler. The status will be read by the
// upb_src immediately after a handler has returned UPB_BREAK and used as the
// overall upb_src status; it will not be referenced at any other time.
INLINE void upb_set_handler_closure(upb_handlers *h, void *closure,
upb_status *status);
/* upb_src ********************************************************************/
struct _upb_src;
typedef struct _upb_src upb_src;
// upb_src_sethandlers() must be called once and only once before upb_src_run()
// is called. This sets up the callbacks that will handle the parse. A
// upb_src that is fully initialized except for the call to
// upb_src_sethandlers() is called "prepared" -- this is useful for library
// functions that want to consume the output of a generic upb_src.
// Calling sethandlers() multiple times is an error and will trigger an abort().
INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers);
// Runs the src, calling the callbacks that were registered with
// upb_src_sethandlers(), and returning the status of the operation in
// "status." The status might indicate UPB_TRYAGAIN (indicating EAGAIN on a
// non-blocking socket) or a resumable error; in both cases upb_src_run can be
// called again later. TRYAGAIN could come from either the src (input buffers
// are empty) or the handlers (output buffers are full).
INLINE void upb_src_run(upb_src *src, upb_status *status);
// A convenience object that a upb_src can use to invoke handlers. It
// transparently handles delegation so that the upb_src needs only follow the
// protocol as if delegation did not exist.
struct _upb_dispatcher;
typedef struct _upb_dispatcher upb_dispatcher;
INLINE void upb_dispatcher_init(upb_dispatcher *d);
INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h);
INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d);
INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d);
INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
struct _upb_fielddef *f);
INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d);
INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, struct _upb_fielddef *f,
upb_value val);
INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d,
upb_field_number_t fieldnum,
upb_value val);
/* upb_bytesrc ****************************************************************/
// Reads up to "count" bytes into "buf", returning the total number of bytes
// read. If 0, indicates error and puts details in "status".
INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf,
upb_strlen_t count, upb_status *status);
// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure
// that "str" is created or just recycled. Returns "false" if no data was
// returned, either due to error or EOF (check status for details).
//
// In comparison to upb_bytesrc_read(), this call can possibly alias existing
// string data (which avoids a copy). On the other hand, if the data was *not*
// already in an existing string, this copies it into a upb_string, and if the
// data needs to be put in a specific range of memory (because eg. you need to
// put it into a different kind of string object) then upb_bytesrc_get() could
// save you a copy.
INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str,
upb_status *status);
// A convenience function for getting all the remaining data in a upb_bytesrc
// as a upb_string. Returns false and sets "status" if the operation fails.
INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str,
upb_status *status);
INLINE bool upb_value_getfullstr(upb_value val, upb_string *str,
upb_status *status) {
return upb_bytesrc_getfullstr(upb_value_getbytesrc(val), str, status);
}
/* upb_bytesink ***************************************************************/
struct _upb_bytesink;
typedef struct _upb_bytesink upb_bytesink;
// TODO: Figure out how buffering should be handled. Should the caller buffer
// data and only call these functions when a buffer is full? Seems most
// efficient, but then buffering has to be configured in the caller, which
// could be anything, which makes it hard to have a standard interface for
// controlling buffering.
//
// The downside of having the bytesink buffer is efficiency: the caller is
// making more (virtual) function calls, and the caller can't arrange to have
// a big contiguous buffer. The bytesink can do this, but will have to copy
// to make the data contiguous.
// Returns the number of bytes written.
INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
const char *fmt, ...);
// Puts the given string, returning true if the operation was successful, otherwise
// check "status" for details. Ownership of the string is *not* passed; if
// the callee wants a reference he must call upb_string_getref() on it.
INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str,
upb_status *status);
#include "upb_stream_vtbl.h"
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -0,0 +1,307 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* vtable declarations for types that are implementing any of the src or sink
* interfaces. Only components that are implementing these interfaces need
* to worry about this file.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_SRCSINK_VTBL_H_
#define UPB_SRCSINK_VTBL_H_
#include <assert.h>
#include "upb_stream.h"
#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
#endif
// Typedefs for function pointers to all of the virtual functions.
// upb_src
typedef void (*upb_src_sethandlers_fptr)(upb_src *src, upb_handlers *handlers);
typedef void (*upb_src_run_fptr)(upb_src *src, upb_status *status);
// upb_bytesrc.
typedef upb_strlen_t (*upb_bytesrc_read_fptr)(
upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status);
typedef bool (*upb_bytesrc_getstr_fptr)(
upb_bytesrc *src, upb_string *str, upb_status *status);
// upb_bytesink.
typedef upb_strlen_t (*upb_bytesink_write_fptr)(
upb_bytesink *bytesink, void *buf, upb_strlen_t count);
typedef upb_strlen_t (*upb_bytesink_putstr_fptr)(
upb_bytesink *bytesink, upb_string *str, upb_status *status);
typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)(
upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args);
// Vtables for the above interfaces.
typedef struct {
upb_bytesrc_read_fptr read;
upb_bytesrc_getstr_fptr getstr;
} upb_bytesrc_vtbl;
typedef struct {
upb_bytesink_write_fptr write;
upb_bytesink_putstr_fptr putstr;
upb_bytesink_vprintf_fptr vprintf;
} upb_bytesink_vtbl;
typedef struct {
upb_src_sethandlers_fptr sethandlers;
upb_src_run_fptr run;
} upb_src_vtbl;
// "Base Class" definitions; components that implement these interfaces should
// contain one of these structures.
struct _upb_bytesrc {
upb_bytesrc_vtbl *vtbl;
upb_status status;
bool eof;
};
struct _upb_bytesink {
upb_bytesink_vtbl *vtbl;
upb_status status;
bool eof;
};
struct _upb_src {
upb_src_vtbl *vtbl;
};
INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) {
s->vtbl = vtbl;
s->eof = false;
upb_status_init(&s->status);
}
INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) {
s->vtbl = vtbl;
s->eof = false;
upb_status_init(&s->status);
}
INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) {
s->vtbl = vtbl;
}
// Implementation of virtual function dispatch.
// upb_src
INLINE void upb_src_sethandlers(upb_src *src, upb_handlers *handlers) {
src->vtbl->sethandlers(src, handlers);
}
INLINE void upb_src_run(upb_src *src, upb_status *status) {
src->vtbl->run(src, status);
}
// upb_bytesrc
INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf,
upb_strlen_t count, upb_status *status) {
return src->vtbl->read(src, buf, count, status);
}
INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str,
upb_status *status) {
return src->vtbl->getstr(src, str, status);
}
INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str,
upb_status *status) {
// We start with a getstr, because that could possibly alias data instead of
// copying.
if (!upb_bytesrc_getstr(src, str, status)) return false;
// Trade-off between number of read calls and amount of overallocation.
const size_t bufsize = 4096;
do {
upb_strlen_t len = upb_string_len(str);
char *buf = upb_string_getrwbuf(str, len + bufsize);
upb_strlen_t read = upb_bytesrc_read(src, buf + len, bufsize, status);
if (read < 0) return false;
// Resize to proper size.
upb_string_getrwbuf(str, len + read);
} while (!status->code != UPB_EOF);
return true;
}
INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; }
INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; }
// upb_bytesink
INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf,
upb_strlen_t count) {
return sink->vtbl->write(sink, buf, count);
}
INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
return sink->vtbl->putstr(sink, str, status);
}
INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) {
return &sink->status;
}
INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
va_end(args);
return ret;
}
// upb_handlers
struct _upb_handlers {
upb_handlerset *set;
void *closure;
upb_status *status; // We don't own this.
};
INLINE void upb_handlers_init(upb_handlers *h) {
(void)h;
}
INLINE void upb_handlers_uninit(upb_handlers *h) {
(void)h;
}
INLINE void upb_handlers_reset(upb_handlers *h) {
h->set = NULL;
h->closure = NULL;
}
INLINE bool upb_handlers_isempty(upb_handlers *h) {
return !h->set && !h->closure;
}
INLINE upb_flow_t upb_nop(void *closure) {
(void)closure;
return UPB_CONTINUE;
}
INLINE upb_flow_t upb_value_nop(void *closure, struct _upb_fielddef *f, upb_value val) {
(void)closure;
(void)f;
(void)val;
return UPB_CONTINUE;
}
INLINE upb_flow_t upb_startsubmsg_nop(void *closure, struct _upb_fielddef *f,
upb_handlers *delegate_to) {
(void)closure;
(void)f;
(void)delegate_to;
return UPB_CONTINUE;
}
INLINE upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
upb_value val) {
(void)closure;
(void)fieldnum;
(void)val;
return UPB_CONTINUE;
}
INLINE void upb_register_handlerset(upb_handlers *h, upb_handlerset *set) {
if (!set->startmsg) set->startmsg = &upb_nop;
if (!set->endmsg) set->endmsg = &upb_nop;
if (!set->value) set->value = &upb_value_nop;
if (!set->startsubmsg) set->startsubmsg = &upb_startsubmsg_nop;
if (!set->endsubmsg) set->endsubmsg = &upb_nop;
if (!set->unknownval) set->unknownval = &upb_unknownval_nop;
h->set = set;
}
INLINE void upb_set_handler_closure(upb_handlers *h, void *closure,
upb_status *status) {
h->closure = closure;
h->status = status;
}
// upb_dispatcher
typedef struct {
upb_handlers handlers;
int depth;
} upb_dispatcher_frame;
struct _upb_dispatcher {
upb_dispatcher_frame stack[UPB_MAX_NESTING], *top, *limit;
};
INLINE void upb_dispatcher_init(upb_dispatcher *d) {
d->limit = d->stack + sizeof(d->stack);
}
INLINE void upb_dispatcher_reset(upb_dispatcher *d, upb_handlers *h) {
d->top = d->stack;
d->top->depth = 1; // Never want to trigger end-of-delegation.
d->top->handlers = *h;
}
INLINE upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) {
assert(d->stack == d->top);
return d->top->handlers.set->startmsg(d->top->handlers.closure);
}
INLINE upb_flow_t upb_dispatch_endmsg(upb_dispatcher *d) {
assert(d->stack == d->top);
return d->top->handlers.set->endmsg(d->top->handlers.closure);
}
// TODO: several edge cases to fix:
// - delegated start returns UPB_BREAK, should replay the start on resume.
// - endsubmsg returns UPB_BREAK, should NOT replay the delegated endmsg.
INLINE upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
struct _upb_fielddef *f) {
upb_handlers handlers;
upb_handlers_init(&handlers);
upb_handlers_reset(&handlers);
upb_flow_t ret = d->top->handlers.set->startsubmsg(d->top->handlers.closure, f, &handlers);
assert((ret == UPB_DELEGATE) == !upb_handlers_isempty(&handlers));
if (ret == UPB_DELEGATE) {
++d->top;
d->top->handlers = handlers;
d->top->depth = 0;
ret = d->top->handlers.set->startmsg(d->top->handlers.closure);
}
if (ret == UPB_CONTINUE) ++d->top->depth;
upb_handlers_uninit(&handlers);
return ret;
}
INLINE upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) {
upb_flow_t ret;
if (--d->top->depth == 0) {
ret = d->top->handlers.set->endmsg(d->top->handlers.closure);
if (ret != UPB_CONTINUE) return ret;
--d->top;
assert(d->top >= d->stack);
}
return d->top->handlers.set->endsubmsg(d->top->handlers.closure);
}
INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d,
struct _upb_fielddef *f,
upb_value val) {
return d->top->handlers.set->value(d->top->handlers.closure, f, val);
}
INLINE upb_flow_t upb_dispatch_unknownval(upb_dispatcher *d,
upb_field_number_t fieldnum,
upb_value val) {
return d->top->handlers.set->unknownval(d->top->handlers.closure,
fieldnum, val);
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -0,0 +1,161 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*/
#include "upb_string.h"
#include <stdlib.h>
#ifdef __GLIBC__
#include <malloc.h>
#elif defined(__APPLE__)
#include <malloc/malloc.h>
#endif
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
upb_string *upb_string_new() {
upb_string *str = malloc(sizeof(*str));
str->ptr = NULL;
str->cached_mem = NULL;
str->len = 0;
#ifndef UPB_HAVE_MSIZE
str->size = 0;
#endif
str->src = NULL;
upb_atomic_refcount_init(&str->refcount, 1);
return str;
}
uint32_t upb_string_size(upb_string *str) {
#ifdef __GLIBC__
return malloc_usable_size(str->cached_mem);
#elif defined(__APPLE__)
return malloc_size(str->cached_mem);
#else
return str->size;
#endif
}
static void upb_string_release(upb_string *str) {
if(str->src) {
upb_string_unref(str->src);
str->src = NULL;
}
}
void _upb_string_free(upb_string *str) {
if(str->cached_mem) free(str->cached_mem);
upb_string_release(str);
free(str);
}
void upb_string_recycle(upb_string **_str) {
upb_string *str = *_str;
if(str && upb_atomic_read(&str->refcount) == 1) {
str->ptr = NULL;
upb_string_release(str);
} else {
upb_string_unref(str);
*_str = upb_string_new();
}
}
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
// assert(str->ptr == NULL);
upb_strlen_t size = upb_string_size(str);
if (size < len) {
size = upb_round_up_pow2(len);
str->cached_mem = realloc(str->cached_mem, size);
#ifndef UPB_HAVE_MSIZE
str->size = size;
#endif
}
str->len = len;
str->ptr = str->cached_mem;
return str->cached_mem;
}
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
if(str->ptr) *(char*)0 = 0;
assert(str->ptr == NULL);
str->src = upb_string_getref(target_str);
str->ptr = upb_string_getrobuf(target_str) + start;
str->len = len;
}
void upb_string_vprintf(upb_string *str, const char *format, va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
uint32_t size = UPB_MAX(upb_string_size(str), 16);
char *buf = upb_string_getrwbuf(str, size);
va_list args_copy;
va_copy(args_copy, args);
uint32_t true_size = vsnprintf(buf, size, format, args_copy);
va_end(args_copy);
if (true_size >= size) {
// Need to reallocate. We reallocate even if the sizes were equal,
// because snprintf excludes the terminating NULL from its count.
// We don't care about the terminating NULL, but snprintf might
// bail out of printing even other characters if it doesn't have
// enough space to write the NULL also.
upb_string_recycle(&str);
buf = upb_string_getrwbuf(str, true_size + 1);
vsnprintf(buf, true_size + 1, format, args);
}
str->len = true_size;
}
upb_string *upb_string_asprintf(const char *format, ...) {
upb_string *str = upb_string_new();
va_list args;
va_start(args, format);
upb_string_vprintf(str, format, args);
va_end(args);
return str;
}
upb_string *upb_strdup(upb_string *s) {
upb_string *str = upb_string_new();
upb_strcpy(str, s);
return str;
}
void upb_strcat(upb_string *s, upb_string *append) {
uint32_t old_size = upb_string_len(s);
uint32_t append_size = upb_string_len(append);
uint32_t new_size = old_size + append_size;
char *buf = upb_string_getrwbuf(s, new_size);
memcpy(buf + old_size, upb_string_getrobuf(append), append_size);
}
upb_string *upb_strreadfile(const char *filename) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
upb_string *s = upb_string_new();
char *buf = upb_string_getrwbuf(s, size);
if(fread(buf, size, 1, f) != 1) goto error;
fclose(f);
return s;
error:
fclose(f);
return NULL;
}

@ -0,0 +1,342 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*
* This file defines a simple string type which is length-delimited instead
* of NULL-terminated, and which has useful sharing semantics.
*
* The overriding goal of upb_string is to avoid memcpy(), malloc(), and free()
* wheverever possible, while keeping both CPU and memory overhead low.
* Throughout upb there are situations where one wants to reference all or part
* of another string without copying. upb_string provides APIs for doing this,
* and allows the referenced string to be kept alive for as long as anyone is
* referencing it.
*
* Characteristics of upb_string:
* - strings are reference-counted.
* - strings are immutable (can be mutated only when first created or recycled).
* - if a string has no other referents, it can be "recycled" into a new string
* without having to reallocate the upb_string.
* - strings can be substrings of other strings (owning a ref on the source
* string).
*
* Reference-counted strings have recently fallen out of favor because of the
* performance impacts of doing thread-safe reference counting with atomic
* operations. We side-step this issue by not performing atomic operations
* unless the string has been marked thread-safe. Time will tell whether this
* scheme is easy and convenient enough to be practical.
*
* Strings are expected to be 8-bit-clean, but "char*" is such an entrenched
* idiom that we go with it instead of making our pointers uint8_t*.
*
* WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE
* UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if
* you are logically passing a reference to a upb_string to another thread
* (which implies that the other thread must eventually call unref of recycle),
* you have two options:
*
* - create a copy of the string that will be used in the other thread only.
* - call upb_string_get_synchronized_ref(), which will make getref, unref, and
* recycle thread-safe for this upb_string.
*/
#ifndef UPB_STRING_H
#define UPB_STRING_H
#include <assert.h>
#include <string.h>
#include <stdarg.h>
#include "upb_atomic.h"
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
// All members of this struct are private, and may only be read/written through
// the associated functions.
struct _upb_string {
// The pointer to our currently active data. This may be memory we own
// or a pointer into memory we don't own.
const char *ptr;
// If non-NULL, this is a block of memory we own. We keep this cached even
// if "ptr" is currently aliasing memory we don't own.
char *cached_mem;
// The effective length of the string (the bytes at ptr).
int32_t len;
#ifndef UPB_HAVE_MSIZE
// How many bytes are allocated in cached_mem.
//
// Many platforms have a function that can tell you the size of a block
// that was previously malloc'd. In this case we can avoid storing the
// size explicitly.
uint32_t size;
#endif
// The string's refcount.
upb_atomic_refcount_t refcount;
// Used if this is a slice of another string, NULL otherwise. We own a ref
// on src.
struct _upb_string *src;
};
// Internal-only initializer for upb_string instances.
#ifdef UPB_HAVE_MSIZE
#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, {refcount}, NULL}
#else
#define _UPB_STRING_INIT(str, len, refcount) {(char*)str, NULL, len, 0, {refcount}, NULL}
#endif
// Special pseudo-refcounts for static/stack-allocated strings, respectively.
#define _UPB_STRING_REFCOUNT_STATIC -1
#define _UPB_STRING_REFCOUNT_STACK -2
// Returns a newly-created, empty, non-finalized string. When the string is no
// longer needed, it should be unref'd, never freed directly.
upb_string *upb_string_new();
// Internal-only; clients should call upb_string_unref().
void _upb_string_free(upb_string *str);
// Releases a ref on the given string, which may free the memory. "str"
// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
// UNLESS THE STRING IS SYNCHRONIZED.
INLINE void upb_string_unref(upb_string *str) {
if (str && upb_atomic_read(&str->refcount) > 0 &&
upb_atomic_unref(&str->refcount)) {
_upb_string_free(str);
}
}
upb_string *upb_strdup(upb_string *s); // Forward-declare.
// Returns a string with the same contents as "str". The caller owns a ref on
// the returned string, which may or may not be the same object as "str.
// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED!
INLINE upb_string *upb_string_getref(upb_string *str) {
int refcount = upb_atomic_read(&str->refcount);
if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
// We don't ref the special <0 refcount for static strings.
if (refcount > 0) upb_atomic_ref(&str->refcount);
return str;
}
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
// upb_string_getrobuf() and upb_string_endread() should be kept as short as
// possible, because any pending upb_string_detach() may be blocked until
// upb_string_endread is called(). No other functions may be called on the
// string during this window except upb_string_len().
INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
INLINE void upb_string_endread(upb_string *str) { (void)str; }
// Convenience method for getting the end of the string. Calls
// upb_string_getrobuf() so inherits the caveats of calling that function.
INLINE const char *upb_string_getbufend(upb_string *str) {
return upb_string_getrobuf(str) + upb_string_len(str);
}
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. After the function returns, "str" points to a writable
// string, which is either the original string if it had no other references
// or a newly created string if it did have other references.
//
// As a special case, passing a pointer to NULL will allocate a new string.
// This is convenient for the pattern:
//
// upb_string *str = NULL;
// while (x) {
// if (y) {
// upb_string_recycle(&str);
// upb_src_getstr(str);
// }
// }
void upb_string_recycle(upb_string **str);
// The options for setting the contents of a string. These may only be called
// when a string is first created or recycled; once other functions have been
// called on the string, these functions are not allowed until the string is
// recycled.
// Gets a pointer suitable for writing to the string, which is guaranteed to
// have at least "len" bytes of data available. The size of the string will
// become "len".
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
// Replaces the contents of str with the contents of the given printf.
void upb_string_vprintf(upb_string *str, const char *format, va_list args);
INLINE void upb_string_printf(upb_string *str, const char *format, ...) {
va_list args;
va_start(args, format);
upb_string_vprintf(str, format, args);
va_end(args);
}
// Sets the contents of "str" to be the given substring of "target_str", to
// which the caller must own a ref.
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len);
// Sketch of an API for allowing upb_strings to reference external, unowned
// data. Waiting for a clear use case before actually implementing it.
//
// Makes the string "str" a reference to the given string data. The caller
// guarantees that the given string data will not change or be deleted until a
// matching call to upb_string_detach(), which may block until any concurrent
// readers have finished reading. upb_string_detach() preserves the contents
// of the string by copying the referenced data if there are any other
// referents.
// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
// void upb_string_detach(upb_string *str);
// Allows using upb_strings in printf, ie:
// upb_strptr str = UPB_STRLIT("Hello, World!\n");
// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
#define UPB_STRFMT "%.*s"
// Macros for constructing upb_string objects statically or on the stack. These
// can be used like:
//
// upb_string static_str = UPB_STATIC_STRING("Foo");
//
// int main() {
// upb_string stack_str = UPB_STACK_STRING("Foo");
// // Now:
// // upb_streql(&static_str, &stack_str) == true
// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true
// }
//
// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays,
// but you must not change the underlying data once you've passed the string on:
//
// void foo() {
// char data[] = "ABC123";
// upb_string stack_str = UPB_STACK_STR(data);
// bar(&stack_str);
// data[0] = "B"; // NOT ALLOWED!!
// }
//
// TODO: should the stack business just be like attach/detach? The latter seems
// more flexible, though it does require a stack allocation. Maybe put this off
// until there is a clear use case.
#define UPB_STATIC_STRING(str) \
_UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STATIC_STRING_LEN(str, len) \
_UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STACK_STRING(str) \
_UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK)
#define UPB_STACK_STRING_LEN(str, len) \
_UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK)
// A convenient way of specifying upb_strings as literals, like:
//
// upb_streql(UPB_STRLIT("expected"), other_str);
//
// However, this requires either C99 compound initializers or C++.
// Must ONLY be called with a string literal as its argument!
//#ifdef __cplusplus
//namespace upb {
//class String : public upb_string {
// // This constructor must ONLY be called with a string literal.
// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {}
//};
//}
//#define UPB_STRLIT(str) upb::String(str)
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
// overflow. For the most part these only use the public upb_string interface.
// More efficient than upb_strcmp if all you need is to test equality.
INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
upb_strlen_t len = upb_string_len(s1);
if(len != upb_string_len(s2)) {
return false;
} else {
bool ret =
memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
upb_string_endread(s1);
upb_string_endread(s2);
return ret;
}
}
// Like strcmp().
int upb_strcmp(upb_string *s1, upb_string *s2);
// Compare a upb_string with memory or a NULL-terminated C string.
INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) {
return len == upb_string_len(str) &&
memcmp(upb_string_getrobuf(str), buf, len) == 0;
}
INLINE bool upb_streqlc(upb_string *str, const void *buf) {
// Could be made one-pass.
return upb_streqllen(str, buf, strlen((const char*)buf));
}
// Like upb_strcpy, but copies from a buffer and length.
INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
memcpy(upb_string_getrwbuf(dest, len), src, len);
}
// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
upb_string_endread(src);
}
// Like upb_strcpy, but copies from a NULL-terminated string.
INLINE void upb_strcpyc(upb_string *dest, const void *src) {
// This does two passes over src, but that is necessary unless we want to
// repeatedly re-allocate dst, which seems worse.
upb_strcpylen(dest, src, strlen((const char*)src));
}
// Returns a new string whose contents are a copy of s.
upb_string *upb_strdup(upb_string *s);
// Like upb_strdup(), but duplicates a given buffer and length.
INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
upb_string *s = upb_string_new();
upb_strcpylen(s, src, len);
return s;
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
// Returns a new string that is a substring of the given string.
INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) {
upb_string *str = upb_string_new();
upb_string_substr(str, s, offset, len);
return str;
}
// Reads an entire file into a newly-allocated string.
upb_string *upb_strreadfile(const char *filename);
// Returns a new string with the contents of the given printf.
upb_string *upb_string_asprintf(const char *format, ...);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -5,7 +5,7 @@
*/
#include "upb_table.h"
#include "upb_data.h"
#include "upb_string.h"
#include <assert.h>
#include <stdlib.h>
@ -28,7 +28,7 @@ void upb_table_init(upb_table *t, uint32_t size, uint16_t entry_size)
{
t->count = 0;
t->entry_size = entry_size;
t->size_lg2 = 1;
t->size_lg2 = 0;
while(size >>= 1) t->size_lg2++;
size_t bytes = upb_table_size(t) * t->entry_size;
t->mask = upb_table_size(t) - 1;
@ -57,19 +57,19 @@ void upb_strtable_free(upb_strtable *t) {
upb_table_free(&t->t);
}
static uint32_t strtable_bucket(upb_strtable *t, upb_strptr key)
static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
{
uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_strlen(key), 0);
uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
return (hash & (upb_strtable_size(t)-1)) + 1;
}
void *upb_strtable_lookup(upb_strtable *t, upb_strptr key)
void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
{
uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *e;
do {
e = strent(t, bucket);
if(!upb_string_isnull(e->key) && upb_streql(e->key, key)) return e;
if(e->key && upb_streql(e->key, key)) return e;
} while((bucket = e->next) != UPB_END_OF_CHAIN);
return NULL;
}
@ -149,7 +149,7 @@ static uint32_t empty_strbucket(upb_strtable *table)
/* TODO: does it matter that this is biased towards the front of the table? */
for(uint32_t i = 1; i <= upb_strtable_size(table); i++) {
upb_strtable_entry *e = strent(table, i);
if(upb_string_isnull(e->key)) return i;
if(!e->key) return i;
}
assert(false);
return 0;
@ -158,11 +158,11 @@ static uint32_t empty_strbucket(upb_strtable *table)
static void strinsert(upb_strtable *t, upb_strtable_entry *e)
{
assert(upb_strtable_lookup(t, e->key) == NULL);
e->key = upb_string_getref(e->key, UPB_REF_FROZEN);
e->key = upb_string_getref(e->key);
t->t.count++;
uint32_t bucket = strtable_bucket(t, e->key);
upb_strtable_entry *table_e = strent(t, bucket);
if(!upb_string_isnull(table_e->key)) { /* Collision. */
if(table_e->key) { /* Collision. */
if(bucket == strtable_bucket(t, table_e->key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */
@ -179,7 +179,7 @@ static void strinsert(upb_strtable *t, upb_strtable_entry *e)
memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
while(1) {
assert(!upb_string_isnull(evictee_e->key));
assert(evictee_e->key);
assert(evictee_e->next != UPB_END_OF_CHAIN);
if(evictee_e->next == bucket) {
evictee_e->next = empty_bucket;
@ -232,7 +232,7 @@ void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) {
do {
cur = (void*)((char*)cur + t->t.entry_size);
if(cur == end) return NULL;
} while(upb_string_isnull(cur->key));
} while(cur->key == NULL);
return cur;
}

@ -17,6 +17,7 @@
#include <assert.h>
#include "upb.h"
#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
@ -38,7 +39,7 @@ typedef struct {
// performance by letting us compare hashes before comparing lengths or the
// strings themselves.
typedef struct {
upb_strptr key; // We own a frozen ref.
upb_string *key; // We own a ref.
uint32_t next; // Internal chaining.
} upb_strtable_entry;
@ -114,7 +115,7 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
return upb_inttable_fastlookup(t, key, t->t.entry_size);
}
void *upb_strtable_lookup(upb_strtable *t, upb_strptr key);
void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
/* Provides iteration over the table. The order in which the entries are
* returned is undefined. Insertions invalidate iterators. The _next

File diff suppressed because it is too large Load Diff

@ -1,402 +1,26 @@
/* This file was generated by upbc (the upb compiler). Do not edit. */
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file contains declarations for an array that contains the contents
* of descriptor.proto, serialized as a protobuf. xxd is used to create
* the actual definition.
*/
#ifndef DESCRIPTOR_DESCRIPTOR_H
#define DESCRIPTOR_DESCRIPTOR_H
#ifndef UPB_DESCRIPTOR_H_
#define UPB_DESCRIPTOR_H_
#include <upb_data.h>
#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
#endif
struct google_protobuf_FileDescriptorSet;
extern struct google_protobuf_FileDescriptorSet *upb_file_descriptor_set;
/* Forward declarations of all message types.
* So they can refer to each other in possibly-recursive ways. */
struct google_protobuf_UninterpretedOption_NamePart;
typedef struct google_protobuf_UninterpretedOption_NamePart
google_protobuf_UninterpretedOption_NamePart;
UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart)
struct google_protobuf_DescriptorProto;
typedef struct google_protobuf_DescriptorProto
google_protobuf_DescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto)
struct google_protobuf_EnumDescriptorProto;
typedef struct google_protobuf_EnumDescriptorProto
google_protobuf_EnumDescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumDescriptorProto)
struct google_protobuf_UninterpretedOption;
typedef struct google_protobuf_UninterpretedOption
google_protobuf_UninterpretedOption;
UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption)
struct google_protobuf_FileDescriptorProto;
typedef struct google_protobuf_FileDescriptorProto
google_protobuf_FileDescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorProto)
struct google_protobuf_MethodDescriptorProto;
typedef struct google_protobuf_MethodDescriptorProto
google_protobuf_MethodDescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodDescriptorProto)
struct google_protobuf_EnumValueOptions;
typedef struct google_protobuf_EnumValueOptions
google_protobuf_EnumValueOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueOptions)
struct google_protobuf_EnumValueDescriptorProto;
typedef struct google_protobuf_EnumValueDescriptorProto
google_protobuf_EnumValueDescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueDescriptorProto)
struct google_protobuf_ServiceDescriptorProto;
typedef struct google_protobuf_ServiceDescriptorProto
google_protobuf_ServiceDescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceDescriptorProto)
struct google_protobuf_FileDescriptorSet;
typedef struct google_protobuf_FileDescriptorSet
google_protobuf_FileDescriptorSet;
UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorSet)
struct google_protobuf_DescriptorProto_ExtensionRange;
typedef struct google_protobuf_DescriptorProto_ExtensionRange
google_protobuf_DescriptorProto_ExtensionRange;
UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto_ExtensionRange)
struct google_protobuf_FieldOptions;
typedef struct google_protobuf_FieldOptions
google_protobuf_FieldOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldOptions)
struct google_protobuf_FileOptions;
typedef struct google_protobuf_FileOptions
google_protobuf_FileOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_FileOptions)
struct google_protobuf_MessageOptions;
typedef struct google_protobuf_MessageOptions
google_protobuf_MessageOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_MessageOptions)
struct google_protobuf_EnumOptions;
typedef struct google_protobuf_EnumOptions
google_protobuf_EnumOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumOptions)
struct google_protobuf_FieldDescriptorProto;
typedef struct google_protobuf_FieldDescriptorProto
google_protobuf_FieldDescriptorProto;
UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldDescriptorProto)
struct google_protobuf_ServiceOptions;
typedef struct google_protobuf_ServiceOptions
google_protobuf_ServiceOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceOptions)
struct google_protobuf_MethodOptions;
typedef struct google_protobuf_MethodOptions
google_protobuf_MethodOptions;
UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodOptions)
/* The message definitions themselves. */
struct google_protobuf_UninterpretedOption_NamePart {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name_part:1; /* = 1, required. */
bool is_extension:1; /* = 2, required. */
} has;
} set_flags;
upb_strptr name_part;
bool is_extension;
};
struct google_protobuf_DescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool field:1; /* = 2, repeated. */
bool nested_type:1; /* = 3, repeated. */
bool enum_type:1; /* = 4, repeated. */
bool extension_range:1; /* = 5, repeated. */
bool extension:1; /* = 6, repeated. */
bool options:1; /* = 7, optional. */
} has;
} set_flags;
upb_strptr name;
UPB_MSG_ARRAYPTR(google_protobuf_FieldDescriptorProto) field;
UPB_MSG_ARRAYPTR(google_protobuf_DescriptorProto) nested_type;
UPB_MSG_ARRAYPTR(google_protobuf_EnumDescriptorProto) enum_type;
UPB_MSG_ARRAYPTR(google_protobuf_DescriptorProto_ExtensionRange) extension_range;
UPB_MSG_ARRAYPTR(google_protobuf_FieldDescriptorProto) extension;
google_protobuf_MessageOptions* options;
};
struct google_protobuf_EnumDescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool value:1; /* = 2, repeated. */
bool options:1; /* = 3, optional. */
} has;
} set_flags;
upb_strptr name;
UPB_MSG_ARRAYPTR(google_protobuf_EnumValueDescriptorProto) value;
google_protobuf_EnumOptions* options;
};
struct google_protobuf_UninterpretedOption {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 2, repeated. */
bool identifier_value:1; /* = 3, optional. */
bool positive_int_value:1; /* = 4, optional. */
bool negative_int_value:1; /* = 5, optional. */
bool double_value:1; /* = 6, optional. */
bool string_value:1; /* = 7, optional. */
} has;
} set_flags;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption_NamePart) name;
upb_strptr identifier_value;
uint64_t positive_int_value;
int64_t negative_int_value;
double double_value;
upb_strptr string_value;
};
struct google_protobuf_FileDescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool package:1; /* = 2, optional. */
bool dependency:1; /* = 3, repeated. */
bool message_type:1; /* = 4, repeated. */
bool enum_type:1; /* = 5, repeated. */
bool service:1; /* = 6, repeated. */
bool extension:1; /* = 7, repeated. */
bool options:1; /* = 8, optional. */
} has;
} set_flags;
upb_strptr name;
upb_strptr package;
upb_arrayptr dependency;
UPB_MSG_ARRAYPTR(google_protobuf_DescriptorProto) message_type;
UPB_MSG_ARRAYPTR(google_protobuf_EnumDescriptorProto) enum_type;
UPB_MSG_ARRAYPTR(google_protobuf_ServiceDescriptorProto) service;
UPB_MSG_ARRAYPTR(google_protobuf_FieldDescriptorProto) extension;
google_protobuf_FileOptions* options;
};
struct google_protobuf_MethodDescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool input_type:1; /* = 2, optional. */
bool output_type:1; /* = 3, optional. */
bool options:1; /* = 4, optional. */
} has;
} set_flags;
upb_strptr name;
upb_strptr input_type;
upb_strptr output_type;
google_protobuf_MethodOptions* options;
};
struct google_protobuf_EnumValueOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
struct google_protobuf_EnumValueDescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool number:1; /* = 2, optional. */
bool options:1; /* = 3, optional. */
} has;
} set_flags;
upb_strptr name;
int32_t number;
google_protobuf_EnumValueOptions* options;
};
struct google_protobuf_ServiceDescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool method:1; /* = 2, repeated. */
bool options:1; /* = 3, optional. */
} has;
} set_flags;
upb_strptr name;
UPB_MSG_ARRAYPTR(google_protobuf_MethodDescriptorProto) method;
google_protobuf_ServiceOptions* options;
};
struct google_protobuf_FileDescriptorSet {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool file:1; /* = 1, repeated. */
} has;
} set_flags;
UPB_MSG_ARRAYPTR(google_protobuf_FileDescriptorProto) file;
};
struct google_protobuf_DescriptorProto_ExtensionRange {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool start:1; /* = 1, optional. */
bool end:1; /* = 2, optional. */
} has;
} set_flags;
int32_t start;
int32_t end;
};
struct google_protobuf_FieldOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool ctype:1; /* = 1, optional. */
bool packed:1; /* = 2, optional. */
bool deprecated:1; /* = 3, optional. */
bool experimental_map_key:1; /* = 9, optional. */
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
int32_t ctype;
bool packed;
bool deprecated;
upb_strptr experimental_map_key;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
struct google_protobuf_FileOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool java_package:1; /* = 1, optional. */
bool java_outer_classname:1; /* = 8, optional. */
bool optimize_for:1; /* = 9, optional. */
bool java_multiple_files:1; /* = 10, optional. */
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
upb_strptr java_package;
upb_strptr java_outer_classname;
int32_t optimize_for;
bool java_multiple_files;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
struct google_protobuf_MessageOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool message_set_wire_format:1; /* = 1, optional. */
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
bool message_set_wire_format;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
struct google_protobuf_EnumOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
struct google_protobuf_FieldDescriptorProto {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool name:1; /* = 1, optional. */
bool extendee:1; /* = 2, optional. */
bool number:1; /* = 3, optional. */
bool label:1; /* = 4, optional. */
bool type:1; /* = 5, optional. */
bool type_name:1; /* = 6, optional. */
bool default_value:1; /* = 7, optional. */
bool options:1; /* = 8, optional. */
} has;
} set_flags;
upb_strptr name;
upb_strptr extendee;
int32_t number;
int32_t label;
int32_t type;
upb_strptr type_name;
upb_strptr default_value;
google_protobuf_FieldOptions* options;
};
struct google_protobuf_ServiceOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
struct google_protobuf_MethodOptions {
upb_data base;
union {
uint8_t bytes[1];
struct {
bool uninterpreted_option:1; /* = 999, repeated. */
} has;
} set_flags;
UPB_MSG_ARRAYPTR(google_protobuf_UninterpretedOption) uninterpreted_option;
};
extern upb_string descriptor_str;
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* DESCRIPTOR_DESCRIPTOR_H */
#endif /* UPB_DESCRIPTOR_H_ */

@ -14,5 +14,5 @@
set -e
rm -f deps
for file in $@; do
gcc -MM $file -MT ${file%.*}.o -DUPB_THREAD_UNSAFE -Idescriptor -Isrc -I. >> deps
gcc -MM $file -MT ${file%.*}.o -DUPB_THREAD_UNSAFE -Idescriptor -Icore -Istream -I. >> deps
done

@ -0,0 +1,336 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* A Lua extension for upb.
*/
#include <stdlib.h>
#include "lauxlib.h"
#include "upb_def.h"
void lupb_pushstring(lua_State *L, upb_string *str) {
lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str));
}
/* object cache ***************************************************************/
// We cache all the lua objects (userdata) we vend in a weak table, indexed by
// the C pointer of the object they are caching.
typedef void (*lupb_cb)(void *cobj);
static void lupb_nop(void *foo) {
(void)foo;
}
static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lupb_cb ref, lupb_cb unref) {
// Lookup our cache in the registry (we don't put our objects in the registry
// directly because we need our cache to be a weak table).
lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache");
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
lua_pushlightuserdata(L, cobj);
lua_rawget(L, -2);
// Stack: objcache, cached value.
if (lua_isnil(L, -1)) {
// Remove bad cached value and push new value.
lua_pop(L, 1);
// We take advantage of the fact that all of our objects are currently a
// single pointer, and thus have the same layout.
void **obj = lua_newuserdata(L, sizeof(void*));
*obj = cobj;
luaL_getmetatable(L, type);
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
lua_setmetatable(L, -2);
// Set it in the cache.
lua_pushlightuserdata(L, cobj);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
ref(cobj);
} else {
unref(cobj);
}
lua_insert(L, -2);
lua_pop(L, 1);
}
/* lupb_def *******************************************************************/
// All the def types share the same C layout, even though they are different Lua
// types with different metatables.
typedef struct {
upb_def *def;
} lupb_def;
static void lupb_def_unref(void *cobj) {
upb_def_unref((upb_def*)cobj);
}
static void lupb_def_getorcreate(lua_State *L, upb_def *def) {
const char *type_name;
switch(def->type) {
case UPB_DEF_MSG:
type_name = "upb.msgdef";
break;
case UPB_DEF_ENUM:
type_name = "upb.enumdef";
break;
default:
luaL_error(L, "unknown deftype %d", def->type);
type_name = NULL; // Placate the compiler.
}
return lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref);
}
// msgdef
static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef");
return upb_downcast_msgdef(ldef->def);
}
static int lupb_msgdef_gc(lua_State *L) {
lupb_def *ldef = luaL_checkudata(L, 1, "upb.msgdef");
upb_def_unref(ldef->def);
return 0;
}
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f);
static int lupb_msgdef_fieldbyname(lua_State *L) {
upb_msgdef *m = lupb_msgdef_check(L, 1);
size_t len;
const char *name = luaL_checklstring(L, 2, &len);
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_fielddef *f = upb_msgdef_ntof(m, &namestr);
if (f) {
lupb_fielddef_getorcreate(L, f);
} else {
lua_pushnil(L);
}
return 1;
}
static int lupb_msgdef_fieldbynum(lua_State *L) {
upb_msgdef *m = lupb_msgdef_check(L, 1);
int num = luaL_checkint(L, 2);
upb_fielddef *f = upb_msgdef_itof(m, num);
if (f) {
lupb_fielddef_getorcreate(L, f);
} else {
lua_pushnil(L);
}
return 1;
}
static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__gc", lupb_msgdef_gc},
{NULL, NULL}
};
static const struct luaL_Reg lupb_msgdef_m[] = {
{"fieldbyname", lupb_msgdef_fieldbyname},
{"fieldbynum", lupb_msgdef_fieldbynum},
{NULL, NULL}
};
// enumdef
static lupb_def *lupb_enumdef_check(lua_State *L, int narg) {
return luaL_checkudata(L, narg, "upb.enumdef");
}
static int lupb_enumdef_gc(lua_State *L) {
lupb_def *ldef = lupb_enumdef_check(L, 1);
upb_def_unref(ldef->def);
return 0;
}
static const struct luaL_Reg lupb_enumdef_mm[] = {
{"__gc", lupb_enumdef_gc},
{NULL, NULL}
};
static const struct luaL_Reg lupb_enumdef_m[] = {
{NULL, NULL}
};
/* lupb_fielddef **************************************************************/
typedef struct {
upb_fielddef *field;
} lupb_fielddef;
static void lupb_fielddef_ref(void *cobj) {
upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef));
}
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) {
lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop);
}
static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) {
return luaL_checkudata(L, narg, "upb.fielddef");
}
static int lupb_fielddef_index(lua_State *L) {
lupb_fielddef *f = lupb_fielddef_check(L, 1);
const char *str = luaL_checkstring(L, 2);
if (strcmp(str, "name") == 0) {
lupb_pushstring(L, f->field->name);
} else if (strcmp(str, "number") == 0) {
lua_pushinteger(L, f->field->number);
} else if (strcmp(str, "type") == 0) {
lua_pushinteger(L, f->field->type);
} else if (strcmp(str, "label") == 0) {
lua_pushinteger(L, f->field->label);
} else if (strcmp(str, "def") == 0) {
upb_def_ref(f->field->def);
lupb_def_getorcreate(L, f->field->def);
} else if (strcmp(str, "msgdef") == 0) {
upb_def_ref(UPB_UPCAST(f->field->msgdef));
lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef));
} else {
lua_pushnil(L);
}
return 1;
}
static int lupb_fielddef_gc(lua_State *L) {
lupb_fielddef *lfielddef = lupb_fielddef_check(L, 1);
upb_def_unref(UPB_UPCAST(lfielddef->field->msgdef));
return 0;
}
static const struct luaL_Reg lupb_fielddef_mm[] = {
{"__gc", lupb_fielddef_gc},
{"__index", lupb_fielddef_index},
{NULL, NULL}
};
/* lupb_symtab ****************************************************************/
typedef struct {
upb_symtab *symtab;
} lupb_symtab;
// Inherits a ref on the symtab.
// Checks that narg is a proper lupb_symtab object. If it is, leaves its
// metatable on the stack for cache lookups/updates.
lupb_symtab *lupb_symtab_check(lua_State *L, int narg) {
return luaL_checkudata(L, narg, "upb.symtab");
}
static int lupb_symtab_gc(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
upb_symtab_unref(s->symtab);
return 0;
}
static void lupb_symtab_unref(void *cobj) {
upb_symtab_unref((upb_symtab*)cobj);
}
static int lupb_symtab_lookup(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
size_t len;
const char *name = luaL_checklstring(L, 2, &len);
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_def *def = upb_symtab_lookup(s->symtab, &namestr);
if (def) {
lupb_def_getorcreate(L, def);
} else {
lua_pushnil(L);
}
return 1;
}
static int lupb_symtab_getdefs(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
upb_deftype_t type = luaL_checkint(L, 2);
int count;
upb_def **defs = upb_symtab_getdefs(s->symtab, &count, type);
// Create the table in which we will return the defs.
lua_createtable(L, 0, count);
for (int i = 0; i < count; i++) {
upb_def *def = defs[i];
upb_string *name = def->fqname;
lupb_pushstring(L, name);
lupb_def_getorcreate(L, def);
// Add it to our return table.
lua_settable(L, -3);
}
free(defs);
return 1;
}
static int lupb_symtab_add_descriptorproto(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
upb_symtab_add_descriptorproto(s->symtab);
return 0; // No args to return.
}
static const struct luaL_Reg lupb_symtab_m[] = {
{"add_descriptorproto", lupb_symtab_add_descriptorproto},
//{"addfds", lupb_symtab_addfds},
{"getdefs", lupb_symtab_getdefs},
{"lookup", lupb_symtab_lookup},
//{"resolve", lupb_symtab_resolve},
{NULL, NULL}
};
static const struct luaL_Reg lupb_symtab_mm[] = {
{"__gc", lupb_symtab_gc},
{NULL, NULL}
};
/* lupb toplevel **************************************************************/
static int lupb_symtab_new(lua_State *L) {
upb_symtab *s = upb_symtab_new();
lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref);
return 1;
}
static const struct luaL_Reg lupb_toplevel_m[] = {
{"symtab", lupb_symtab_new},
{NULL, NULL}
};
// Register the given type with the given methods and metamethods.
static void lupb_register_type(lua_State *L, const char *name,
const luaL_Reg *m, const luaL_Reg *mm) {
luaL_newmetatable(L, name);
luaL_register(L, NULL, mm);
lua_createtable(L, 0, 0);
if (m) {
luaL_register(L, NULL, m);
lua_setfield(L, -2, "__index");
}
lua_pop(L, 1); // The mt.
}
int luaopen_upb(lua_State *L) {
lupb_register_type(L, "upb.msgdef", lupb_msgdef_m, lupb_msgdef_mm);
lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm);
lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm);
lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm);
// Create our object cache. TODO: need to make this table weak!
lua_createtable(L, 0, 0);
lua_createtable(L, 0, 1); // Cache metatable.
lua_pushstring(L, "v"); // Values are weak.
lua_setfield(L, -2, "__mode");
lua_setfield(L, LUA_REGISTRYINDEX, "upb.objcache");
luaL_register(L, "upb", lupb_toplevel_m);
return 1; // Return package table.
}

@ -1,49 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
*/
#include <stdarg.h>
#include <stddef.h>
#include "upb.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(proto_type, wire_type, ctype) \
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \
{alignof(ctype), sizeof(ctype), wire_type, #ctype},
upb_type_info upb_types[] = {
TYPE_INFO(DOUBLE, UPB_WIRE_TYPE_64BIT, double)
TYPE_INFO(FLOAT, UPB_WIRE_TYPE_32BIT, float)
TYPE_INFO(INT64, UPB_WIRE_TYPE_VARINT, int64_t)
TYPE_INFO(UINT64, UPB_WIRE_TYPE_VARINT, uint64_t)
TYPE_INFO(INT32, UPB_WIRE_TYPE_VARINT, int32_t)
TYPE_INFO(FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t)
TYPE_INFO(FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t)
TYPE_INFO(BOOL, UPB_WIRE_TYPE_VARINT, bool)
TYPE_INFO(MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*)
TYPE_INFO(GROUP, UPB_WIRE_TYPE_START_GROUP, void*)
TYPE_INFO(UINT32, UPB_WIRE_TYPE_VARINT, uint32_t)
TYPE_INFO(ENUM, UPB_WIRE_TYPE_VARINT, uint32_t)
TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t)
TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t)
TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t)
TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t)
TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, upb_strptr)
TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, upb_strptr)
};
void upb_seterr(upb_status *status, enum upb_status_code code,
const char *msg, ...)
{
if(upb_ok(status)) { // The first error is the most interesting.
status->code = code;
va_list args;
va_start(args, msg);
vsnprintf(status->msg, UPB_ERRORMSG_MAXLEN, msg, args);
va_end(args);
}
}

@ -1,500 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include <stdlib.h>
#include "upb_data.h"
#include "upb_decoder.h"
#include "upb_def.h"
static uint32_t round_up_to_pow2(uint32_t v)
{
/* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
/* upb_data *******************************************************************/
static void data_elem_unref(upb_valueptr p, upb_fielddef *f) {
if(upb_issubmsg(f)) {
upb_msg_unref(*p.msg, upb_downcast_msgdef(f->def));
} else if(upb_isstring(f)) {
upb_string_unref(*p.str);
} else {
assert(false);
}
}
static void data_unref(upb_valueptr p, upb_fielddef *f) {
if(upb_isarray(f)) {
upb_array_unref(*p.arr, f);
} else {
data_elem_unref(p, f);
}
}
INLINE void data_init(upb_data *d, int flags) {
d->v = REFCOUNT_ONE | flags;
}
static void check_not_frozen(upb_data *d) {
// On one hand I am reluctant to put abort() calls in a low-level library
// that are enabled in a production build. On the other hand, this is a bug
// in the client code that we cannot recover from, and it seems better to get
// the error here than later.
if(upb_data_hasflag(d, UPB_DATA_FROZEN)) abort();
}
/* upb_string *******************************************************************/
void _upb_string_setptr(upb_strptr s, char *ptr) {
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED))
s.refcounted->ptr = ptr;
else
s.norefcount->ptr = ptr;
}
static void _upb_string_set_bytelen(upb_strptr s, upb_strlen_t newlen) {
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) {
s.refcounted->byte_len = newlen;
} else {
s.norefcount->byte_len = newlen;
}
}
upb_strptr upb_string_new() {
upb_strptr s;
s.refcounted = malloc(sizeof(struct upb_refcounted_string));
data_init(s.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED);
s.refcounted->byte_size = 0;
s.refcounted->byte_len = 0;
s.refcounted->ptr = NULL;
return s;
}
static upb_strlen_t string_get_bytesize(upb_strptr s) {
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) {
return s.refcounted->byte_size;
} else {
return (s.norefcount->byte_size_and_flags & 0xFFFFFFF8) >> 3;
}
}
static void string_set_bytesize(upb_strptr s, upb_strlen_t newsize) {
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED)) {
s.refcounted->byte_size = newsize;
} else {
s.norefcount->byte_size_and_flags &= 0x7;
s.norefcount->byte_size_and_flags |= (newsize << 3);
}
}
void _upb_string_free(upb_strptr s)
{
if(string_get_bytesize(s) != 0) free((void*)upb_string_getrobuf(s));
free(s.base);
}
void upb_string_resize(upb_strptr s, upb_strlen_t byte_len) {
check_not_frozen(s.base);
if(string_get_bytesize(s) < byte_len) {
// Need to resize.
size_t new_byte_size = round_up_to_pow2(byte_len);
_upb_string_setptr(s, realloc(_upb_string_getptr(s), new_byte_size));
string_set_bytesize(s, new_byte_size);
}
_upb_string_set_bytelen(s, byte_len);
}
upb_strptr upb_string_getref(upb_strptr s, int ref_flags) {
if(_upb_data_incref(s.base, ref_flags)) return s;
upb_strptr copy = upb_strdup(s);
if(ref_flags == UPB_REF_FROZEN)
upb_data_setflag(copy.base, UPB_DATA_FROZEN);
return copy;
}
upb_strptr upb_strreadfile(const char *filename) {
FILE *f = fopen(filename, "rb");
if(!f) return UPB_STRING_NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
upb_strptr s = upb_string_new();
char *buf = upb_string_getrwbuf(s, size);
if(fread(buf, size, 1, f) != 1) goto error;
fclose(f);
return s;
error:
fclose(f);
return UPB_STRING_NULL;
}
upb_strptr upb_strdupc(const char *src) {
upb_strptr copy = upb_string_new();
upb_strlen_t len = strlen(src);
char *buf = upb_string_getrwbuf(copy, len);
memcpy(buf, src, len);
return copy;
}
void upb_strcat(upb_strptr s, upb_strptr append) {
upb_strlen_t s_len = upb_strlen(s);
upb_strlen_t append_len = upb_strlen(append);
upb_strlen_t newlen = s_len + append_len;
memcpy(upb_string_getrwbuf(s, newlen) + s_len,
upb_string_getrobuf(append), append_len);
}
upb_strptr upb_strslice(upb_strptr s, int offset, int len) {
upb_strptr slice = upb_string_new();
len = UPB_MIN((upb_strlen_t)len, upb_strlen(s) - (upb_strlen_t)offset);
memcpy(upb_string_getrwbuf(slice, len), upb_string_getrobuf(s) + offset, len);
return slice;
}
upb_strptr upb_strdup(upb_strptr s) {
upb_strptr copy = upb_string_new();
upb_strcpy(copy, s);
return copy;
}
int upb_strcmp(upb_strptr s1, upb_strptr s2) {
upb_strlen_t common_length = UPB_MIN(upb_strlen(s1), upb_strlen(s2));
int common_diff = memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2),
common_length);
return common_diff ==
0 ? ((int)upb_strlen(s1) - (int)upb_strlen(s2)) : common_diff;
}
/* upb_array ******************************************************************/
static void _upb_array_setptr(upb_arrayptr a, void *ptr) {
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED))
a.refcounted->elements._void = ptr;
else
a.norefcount->elements._void = ptr;
}
static void _upb_array_setlen(upb_arrayptr a, upb_strlen_t newlen) {
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) {
a.refcounted->len = newlen;
} else {
a.norefcount->len = newlen;
}
}
upb_arrayptr upb_array_new() {
upb_arrayptr a;
a.refcounted = malloc(sizeof(struct upb_refcounted_array));
data_init(a.base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED);
a.refcounted->size = 0;
a.refcounted->len = 0;
a.refcounted->elements._void = NULL;
return a;
}
// ONLY handles refcounted arrays for the moment.
void _upb_array_free(upb_arrayptr a, upb_fielddef *f)
{
if(upb_elem_ismm(f)) {
for(upb_arraylen_t i = 0; i < a.refcounted->size; i++) {
upb_valueptr p = _upb_array_getptr(a, f, i);
if(!*p.data) continue;
data_elem_unref(p, f);
}
}
if(a.refcounted->size != 0) free(a.refcounted->elements._void);
free(a.refcounted);
}
static upb_arraylen_t array_get_size(upb_arrayptr a) {
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) {
return a.refcounted->size;
} else {
return (a.norefcount->base.v & 0xFFFFFFF8) >> 3;
}
}
static void array_set_size(upb_arrayptr a, upb_arraylen_t newsize) {
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED)) {
a.refcounted->size = newsize;
} else {
a.norefcount->base.v &= 0x7;
a.norefcount->base.v |= (newsize << 3);
}
}
void upb_array_resize(upb_arrayptr a, upb_fielddef *f, upb_strlen_t len) {
check_not_frozen(a.base);
size_t type_size = upb_types[f->type].size;
upb_arraylen_t old_size = array_get_size(a);
if(old_size < len) {
// Need to resize.
size_t new_size = round_up_to_pow2(len);
_upb_array_setptr(a, realloc(_upb_array_getptr_raw(a, 0, 0)._void, new_size * type_size));
array_set_size(a, new_size);
memset(_upb_array_getptr_raw(a, old_size, type_size)._void,
0,
(new_size - old_size) * type_size);
}
_upb_array_setlen(a, len);
}
/* upb_msg ********************************************************************/
static void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->field_index/8] |= (1 << (f->field_index % 8));
}
upb_msg *upb_msg_new(upb_msgdef *md) {
upb_msg *msg = malloc(md->size);
memset(msg, 0, md->size);
data_init(&msg->base, UPB_DATA_HEAPALLOCATED | UPB_DATA_REFCOUNTED);
upb_def_ref(UPB_UPCAST(md));
return msg;
}
// ONLY handles refcounted messages for the moment.
void _upb_msg_free(upb_msg *msg, upb_msgdef *md)
{
for(int i = 0; i < md->num_fields; i++) {
upb_fielddef *f = &md->fields[i];
upb_valueptr p = _upb_msg_getptr(msg, f);
if(!upb_field_ismm(f) || !*p.data) continue;
data_unref(p, f);
}
upb_def_unref(UPB_UPCAST(md));
free(msg);
}
void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str,
upb_status *status)
{
upb_decoder *d = upb_decoder_new(md);
upb_msgsink *s = upb_msgsink_new(md);
upb_msgsink_reset(s, msg);
upb_decoder_reset(d, upb_msgsink_sink(s));
upb_msg_clear(msg, md);
upb_decoder_decode(d, str, status);
upb_decoder_free(d);
upb_msgsink_free(s);
}
#if 0
void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str,
upb_status *status)
{
upb_sizebuilder *sb = upb_sizebuilder_new(md);
upb_encoder *e = upb_encoder_new(md);
upb_strsink *sink = upb_strsink_new();
// Get sizes. We could avoid performing this step in some cases by having a
// bool in the msgdef indicating whether it or any of its children have
// submessages in the def (groups don't count).
upb_sizebuilder_reset(sb);
upb_msgsrc_produce(msg, md, upb_sizebuilder_sink(sb), true);
upb_strsink_reset();
upb_encoder_reset(e, sb, sink);
upb_msgsrc_produce(msg, md, sink, false);
}
#endif
/* upb_msgsrc ****************************************************************/
static void _upb_msgsrc_produceval(upb_value v, upb_fielddef *f, upb_sink *sink,
bool reverse, upb_status *status)
{
// TODO: We need to check status for failure, but how often?
if(upb_issubmsg(f)) {
upb_msgdef *md = upb_downcast_msgdef(f->def);
upb_sink_onstart(sink, f, status);
upb_msgsrc_produce(v.msg, md, sink, reverse, status);
upb_sink_onend(sink, f, status);
} else if(upb_isstring(f)) {
upb_sink_onstr(sink, f, v.str, 0, upb_strlen(v.str), status);
} else {
upb_sink_onvalue(sink, f, v, status);
}
}
void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink,
bool reverse, upb_status *status)
{
for(int i = 0; i < md->num_fields; i++) {
upb_fielddef *f = &md->fields[reverse ? md->num_fields - i - 1 : i];
if(!upb_msg_has(msg, f)) continue;
upb_value v = upb_msg_get(msg, f);
if(upb_isarray(f)) {
upb_arrayptr arr = v.arr;
upb_arraylen_t len = upb_array_len(arr);
for(upb_arraylen_t j = 0; j < upb_array_len(arr); j++) {
upb_value elem = upb_array_get(arr, f, reverse ? len - j - 1 : j);
_upb_msgsrc_produceval(elem, f, sink, reverse, status);
}
} else {
_upb_msgsrc_produceval(v, f, sink, reverse, status);
}
}
}
/* upb_msgsink ***************************************************************/
typedef struct {
upb_msg *msg;
upb_msgdef *md;
} upb_msgsink_frame;
struct upb_msgsink {
upb_sink base;
upb_msgdef *toplevel_msgdef;
upb_msgsink_frame stack[UPB_MAX_NESTING], *top;
};
/* Helper function that returns a pointer to where the next value for field "f"
* should be stored, taking into account whether f is an array that may need to
* be allocated or resized. */
static upb_valueptr get_valueptr(upb_msg *msg, upb_fielddef *f)
{
upb_valueptr p = _upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
if(!upb_msg_has(msg, f)) {
if(upb_array_isnull(*p.arr) || !upb_data_only(*p.data)) {
if(!upb_array_isnull(*p.arr))
upb_array_unref(*p.arr, f);
*p.arr = upb_array_new();
}
upb_array_truncate(*p.arr);
upb_msg_sethas(msg, f);
} else {
assert(!upb_array_isnull(*p.arr));
}
upb_arraylen_t oldlen = upb_array_len(*p.arr);
upb_array_resize(*p.arr, f, oldlen + 1);
p = _upb_array_getptr(*p.arr, f, oldlen);
}
return p;
}
// Callbacks for upb_sink.
// TODO: implement these in terms of public interfaces.
static upb_sink_status _upb_msgsink_valuecb(upb_sink *s, upb_fielddef *f,
upb_value val, upb_status *status)
{
(void)status; // No detectable errors can occur.
upb_msgsink *ms = (upb_msgsink*)s;
upb_msg *msg = ms->top->msg;
upb_valueptr p = get_valueptr(msg, f);
upb_msg_sethas(msg, f);
upb_value_write(p, val, f->type);
return UPB_SINK_CONTINUE;
}
static upb_sink_status _upb_msgsink_strcb(upb_sink *s, upb_fielddef *f,
upb_strptr str,
int32_t start, uint32_t end,
upb_status *status)
{
(void)status; // No detectable errors can occur.
upb_msgsink *ms = (upb_msgsink*)s;
upb_msg *msg = ms->top->msg;
upb_valueptr p = get_valueptr(msg, f);
upb_msg_sethas(msg, f);
if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */
if(upb_string_isnull(*p.str) || !upb_data_only(*p.data)) {
if(!upb_string_isnull(*p.str))
upb_string_unref(*p.str);
*p.str = upb_string_new();
}
upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start);
return UPB_SINK_CONTINUE;
}
static upb_sink_status _upb_msgsink_startcb(upb_sink *s, upb_fielddef *f,
upb_status *status)
{
(void)status; // No detectable errors can occur.
upb_msgsink *ms = (upb_msgsink*)s;
upb_msg *oldmsg = ms->top->msg;
upb_valueptr p = get_valueptr(oldmsg, f);
ms->top++;
if(upb_isarray(f) || !upb_msg_has(oldmsg, f)) {
upb_msgdef *md = upb_downcast_msgdef(f->def);
if(!*p.msg || !upb_data_only(*p.data)) {
if(*p.msg)
upb_msg_unref(*p.msg, md);
*p.msg = upb_msg_new(md);
}
upb_msg_clear(*p.msg, md);
upb_msg_sethas(oldmsg, f);
}
ms->top->msg = *p.msg;
return UPB_SINK_CONTINUE;
}
static upb_sink_status _upb_msgsink_endcb(upb_sink *s, upb_fielddef *f,
upb_status *status)
{
(void)status; // No detectable errors can occur.
(void)f; // Unused.
upb_msgsink *ms = (upb_msgsink*)s;
ms->top--;
return UPB_SINK_CONTINUE;
}
static upb_sink_callbacks _upb_msgsink_vtbl = {
_upb_msgsink_valuecb,
_upb_msgsink_strcb,
_upb_msgsink_startcb,
_upb_msgsink_endcb
};
//
// External upb_msgsink interface.
//
upb_msgsink *upb_msgsink_new(upb_msgdef *md)
{
upb_msgsink *ms = malloc(sizeof(*ms));
upb_sink_init(&ms->base, &_upb_msgsink_vtbl);
ms->toplevel_msgdef = md;
return ms;
}
void upb_msgsink_free(upb_msgsink *sink)
{
free(sink);
}
upb_sink *upb_msgsink_sink(upb_msgsink *sink)
{
return &sink->base;
}
void upb_msgsink_reset(upb_msgsink *ms, upb_msg *msg)
{
ms->top = ms->stack;
ms->top->msg = msg;
ms->top->md = ms->toplevel_msgdef;
}

@ -1,552 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file defines the in-memory format for messages, arrays, and strings
* (which are the three dynamically-allocated structures that make up all
* protobufs).
*
* The members of all structs should be considered private. Access should
* only happen through the provided functions.
*
* Unlike Google's protobuf, messages contain *pointers* to strings and arrays
* instead of including them by value. This makes unused strings and arrays
* use less memory, and lets the strings and arrays have multiple possible
* representations (for example, a string could be a slice). It also gives
* us more flexibility wrt refcounting. The cost is that when a field *is*
* being used, the net memory usage is one pointer more than if we had
* included the thing directly. */
#ifndef UPB_DATA_H
#define UPB_DATA_H
#include <assert.h>
#include <string.h>
#include "upb.h"
#include "upb_atomic.h"
#include "upb_def.h"
#include "upb_sink.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_data *******************************************************************/
// The "base class" of strings, arrays, and messages. Contains a few flags and
// possibly a reference count. None of the functions for upb_data are public,
// but some of the constants are.
// typedef upb_atomic_refcount_t upb_data;
// The flags in upb_data.
typedef enum {
// Set if the object itself was allocated with malloc() and should be freed
// with free(). This flag would be false if the object was allocated on the
// stack or is data from the static segment of an object file. Note that this
// flag does not apply to the data being referenced by a string or array.
//
// If this flag is false, UPB_FLAG_HAS_REFCOUNT must be false also; there is
// no sense refcounting something that does not need to be freed.
UPB_DATA_HEAPALLOCATED = 1,
// Set if the object is frozen against modification. While an object is
// frozen, it is suitable for concurrent readonly access. Note that this
// flag alone is not a sufficient mechanism for preventing any kind of writes
// to the object's memory, because the object could still have a refcount.
UPB_DATA_FROZEN = (1<<1),
// Set if the object has an embedded refcount.
UPB_DATA_REFCOUNTED = (1<<2)
} upb_data_flag;
#define REFCOUNT_MASK 0xFFFFFFF8
#define REFCOUNT_SHIFT 3
#define REFCOUNT_ONE (1<<REFCOUNT_SHIFT)
INLINE bool upb_data_hasflag(upb_data *d, upb_data_flag flag) {
// We read this unsynchronized, because the is_frozen flag (the only flag
// that can change during the life of a upb_data) may not change if the
// data has more than one owner.
return d->v & flag;
}
// INTERNAL-ONLY
INLINE void upb_data_setflag(upb_data *d, upb_data_flag flag) {
d->v |= flag;
}
INLINE uint32_t upb_data_getrefcount(upb_data *d) {
int data;
if(upb_data_hasflag(d, UPB_DATA_FROZEN))
data = upb_atomic_read(d);
else
data = d->v;
return (data & REFCOUNT_MASK) >> REFCOUNT_SHIFT;
}
// Returns true if the given data has only one owner.
INLINE bool upb_data_only(upb_data *data) {
return !upb_data_hasflag(data, UPB_DATA_REFCOUNTED) ||
upb_data_getrefcount(data) == 1;
}
// Specifies the type of ref that is requested based on the kind of access the
// caller needs to the object.
typedef enum {
// Use when the client plans to perform read-only access to the object, and
// only in one thread at a time. This imposes the least requirements on the
// object; it can be either frozen or not. As a result, requesting a
// reference of this type never performs a copy unless the object has no
// refcount.
//
// A ref of this type can always be explicitly converted to frozen or
// unfrozen later.
UPB_REF_THREADUNSAFE_READONLY = 0,
// Use when the client plans to perform read-only access, but from multiple
// threads concurrently. This will force the object to eagerly perform any
// parsing that may have been lazily deferred, and will force a copy if the
// object is not current frozen.
//
// Asking for a reference of this type is equivalent to:
// x = getref(y, UPB_REF_THREADUNSAFE_READONLY);
// x = freeze(x);
// ...except it is more efficient.
UPB_REF_FROZEN = 1,
// Use when the client plans to perform read/write access. As a result, the
// reference will not be thread-safe for concurrent reading *or* writing; the
// object must be externally synchronized if it is being accessed from more
// than one thread. This will force a copy if the object is currently frozen.
//
// Asking for a reference of this type is equivalent to:
// x = getref(y, UPB_REF_THREADUNSAFE_READONLY);
// x = thaw(x);
// ...except it is more efficient.
UPB_REF_MUTABLE = 2
} upb_reftype;
// INTERNAL-ONLY FUNCTION:
// Attempts to increment the reference on d with the given type of ref. If
// this is not possible, returns false.
INLINE bool _upb_data_incref(upb_data *d, upb_reftype reftype) {
bool frozen = upb_data_hasflag(d, UPB_DATA_FROZEN);
if((reftype == UPB_REF_FROZEN && !frozen) ||
(reftype == UPB_REF_MUTABLE && frozen) ||
(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED) &&
!upb_data_hasflag(d, UPB_DATA_REFCOUNTED))) {
return false;
}
// Increment the ref. Only need to use atomic ops if the ref is frozen.
if(upb_data_hasflag(d, UPB_DATA_FROZEN)) upb_atomic_add(d, REFCOUNT_ONE);
else d->v += REFCOUNT_ONE;
return true;
}
// INTERNAL-ONLY FUNCTION:
// Releases a reference on d, returning true if the object should be deleted.
INLINE bool _upb_data_unref(upb_data *d) {
if(upb_data_hasflag(d, UPB_DATA_HEAPALLOCATED)) {
// A heap-allocated object without a refcount should never be decref'd.
// Its owner owns it exlusively and should free it directly.
assert(upb_data_hasflag(d, UPB_DATA_REFCOUNTED));
if(upb_data_hasflag(d, UPB_DATA_FROZEN)) {
int32_t old_val = upb_atomic_fetch_and_add(d, -REFCOUNT_ONE);
return (old_val & REFCOUNT_MASK) == REFCOUNT_ONE;
} else {
d->v -= REFCOUNT_ONE;
return (d->v & REFCOUNT_MASK) == 0;
}
} else {
// Non heap-allocated data never should be deleted.
return false;
}
}
/* upb_string *****************************************************************/
// We have several different representations for string, depending on whether
// it has a refcount (and likely in the future, depending on whether it is a
// slice of another string). We could just have one representation with
// members that are sometimes unused, but this is wasteful in memory. The
// flags that are always part of the first word tell us which representation
// to use.
//
// In a way, this is like inheritance but instead of using a virtual pointer,
// we do switch/case in every "virtual" method. This may sound expensive but
// in many cases the different cases compile to exactly the same code, so there
// is no branch.
struct upb_norefcount_string {
uint32_t byte_size_and_flags;
upb_strlen_t byte_len;
// We expect the data to be 8-bit clean (uint8_t), but char* is such an
// ingrained convention that we follow it.
char *ptr;
};
// Used for a string with a refcount.
struct upb_refcounted_string {
upb_data base;
upb_strlen_t byte_len;
char *ptr;
uint32_t byte_size;
};
// Returns a newly constructed, refcounted string which starts out empty.
// Caller owns one ref on it. The returned string will not be frozen.
upb_strptr upb_string_new(void);
// INTERNAL-ONLY:
// Frees the given string, alone with any memory the string owned.
void _upb_string_free(upb_strptr s);
// Returns a string to which caller owns a ref, and contains the same contents
// as src. The returned value may be a copy of src, if the requested flags
// were incompatible with src's.
upb_strptr upb_string_getref(upb_strptr s, int ref_flags);
#define UPB_STRING_NULL_INITIALIZER {NULL}
static const upb_strptr UPB_STRING_NULL = UPB_STRING_NULL_INITIALIZER;
INLINE bool upb_string_isnull(upb_strptr s) { return s.base == NULL; }
// The caller releases a ref on src, which it must previously have owned a ref
// on.
INLINE void upb_string_unref(upb_strptr s) {
if(_upb_data_unref(s.base)) _upb_string_free(s);
}
// The string is resized to byte_len. The string must not be frozen.
void upb_string_resize(upb_strptr s, upb_strlen_t len);
// Returns a buffer to which the caller may write. The string is resized to
// byte_len (which may or may not trigger a reallocation). The string must not
// be frozen.
INLINE char *upb_string_getrwbuf(upb_strptr s, upb_strlen_t byte_len) {
upb_string_resize(s, byte_len);
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED))
return s.refcounted->ptr;
else
return s.norefcount->ptr;
}
INLINE void upb_string_clear(upb_strptr s) {
upb_string_getrwbuf(s, 0);
}
// INTERNAL-ONLY:
// Gets/sets the pointer.
INLINE char *_upb_string_getptr(upb_strptr s) {
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED))
return s.refcounted->ptr;
else
return s.norefcount->ptr;
}
// Returns a buffer that the caller may use to read the current contents of
// the string. The number of bytes available is upb_strlen(s).
INLINE const char *upb_string_getrobuf(upb_strptr s) {
return _upb_string_getptr(s);
}
// Returns the current length of the string.
INLINE upb_strlen_t upb_strlen(upb_strptr s) {
if(upb_data_hasflag(s.base, UPB_DATA_REFCOUNTED))
return s.refcounted->byte_len;
else
return s.norefcount->byte_len;
}
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
// overflow. These only use the public upb_string interface.
// More efficient than upb_strcmp if all you need is to test equality.
INLINE bool upb_streql(upb_strptr s1, upb_strptr s2) {
upb_strlen_t len = upb_strlen(s1);
if(len != upb_strlen(s2)) {
return false;
} else {
return memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
}
}
// Like strcmp().
int upb_strcmp(upb_strptr s1, upb_strptr s2);
// Like upb_strcpy, but copies from a buffer and length.
INLINE void upb_strcpylen(upb_strptr dest, const void *src, upb_strlen_t len) {
memcpy(upb_string_getrwbuf(dest, len), src, len);
}
// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_strptr dest, upb_strptr src) {
upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src));
}
// Like upb_strcpy, but copies from a NULL-terminated string.
INLINE void upb_strcpyc(upb_strptr dest, const char *src) {
// This does two passes over src, but that is necessary unless we want to
// repeatedly re-allocate dst, which seems worse.
upb_strcpylen(dest, src, strlen(src));
}
// Returns a new string whose contents are a copy of s.
upb_strptr upb_strdup(upb_strptr s);
// Like upb_strdup(), but duplicates a given buffer and length.
INLINE upb_strptr upb_strduplen(const void *src, upb_strlen_t len) {
upb_strptr s = upb_string_new();
upb_strcpylen(s, src, len);
return s;
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
upb_strptr upb_strdupc(const char *src);
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_strptr s, upb_strptr append);
// Returns a string that is a substring of the given string. Currently this
// returns a copy, but in the future this may return an object that references
// the original string data instead of copying it. Both now and in the future,
// the caller owns a ref on whatever is returned.
upb_strptr upb_strslice(upb_strptr s, int offset, int len);
// Reads an entire file into a newly-allocated string (caller owns one ref).
upb_strptr upb_strreadfile(const char *filename);
// Typedef for a read-only string that is allocated statically or on the stack.
// Initialize with the given macro, which must resolve to a const char*. You
// must not dynamically allocate this type. Example usage:
//
// upb_static_string mystr = UPB_STATIC_STRING_INIT("biscuits");
// upb_strptr mystr_ptr = UPB_STATIC_STRING_PTR_INIT(mystr);
//
// If C99 compund literals are available, the much nicer UPB_STRLIT macro is
// available instead:
//
// upb_strtr mystr_ptr = UPB_STRLIT("biscuits");
//
typedef struct upb_norefcount_string upb_static_string;
#define UPB_STATIC_STRING_INIT_LEN(str, len) {0 | UPB_DATA_FROZEN, len, str}
#define UPB_STATIC_STRING_INIT(str) UPB_STATIC_STRING_INIT_LEN(str, sizeof(str)-1)
#define UPB_STATIC_STRING_PTR_INIT(static_string) {&static_string}
#define UPB_STRLIT(str) (upb_strptr){&(upb_static_string)UPB_STATIC_STRING_INIT(str)}
// Allows using upb_strings in printf, ie:
// upb_strptr str = UPB_STRLIT("Hello, World!\n");
// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
#define UPB_STRARG(str) upb_strlen(str), upb_string_getrobuf(str)
#define UPB_STRFMT "%.*s"
/* upb_array ******************************************************************/
// The comments attached to upb_string above also apply here.
struct upb_norefcount_array {
upb_data base; // We co-opt the refcount for the size.
upb_arraylen_t len;
upb_valueptr elements;
};
struct upb_refcounted_array {
upb_data base;
upb_arraylen_t len;
upb_valueptr elements;
upb_arraylen_t size;
};
typedef struct upb_norefcount_array upb_static_array;
#define UPB_STATIC_ARRAY_INIT(arr, len) {{0 | UPB_DATA_FROZEN}, len, {._void=arr}}
#define UPB_STATIC_ARRAY_PTR_TYPED_INIT(static_arr) {{&static_arr}}
#define UPB_ARRAY_NULL_INITIALIZER {NULL}
static const upb_arrayptr UPB_ARRAY_NULL = UPB_ARRAY_NULL_INITIALIZER;
INLINE bool upb_array_isnull(upb_arrayptr a) { return a.base == NULL; }
INLINE bool upb_array_ptreql(upb_arrayptr a1, upb_arrayptr a2) {
return a1.base == a2.base;
}
#define UPB_MSG_ARRAYPTR(type) type ## _array
#define UPB_DEFINE_MSG_ARRAY(type) \
typedef struct { upb_arrayptr ptr; } UPB_MSG_ARRAYPTR(type); \
INLINE upb_arraylen_t type ## _array_len(UPB_MSG_ARRAYPTR(type) a) { \
return upb_array_len(a.ptr); \
} \
INLINE type* type ## _array_get(UPB_MSG_ARRAYPTR(type) a, upb_arraylen_t elem) { \
return *(type**)_upb_array_getptr_raw(a.ptr, elem, sizeof(void*))._void; \
}
// Constructs a newly-allocated, reference-counted array which starts out
// empty. Caller owns one ref on it.
upb_arrayptr upb_array_new(void);
// Returns the current number of elements in the array.
INLINE size_t upb_array_len(upb_arrayptr a) {
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED))
return a.refcounted->len;
else
return a.norefcount->len;
}
// INTERNAL-ONLY:
// Frees the given message and releases references on members.
void _upb_array_free(upb_arrayptr a, upb_fielddef *f);
// INTERNAL-ONLY:
// Returns a pointer to the given elem.
INLINE upb_valueptr _upb_array_getptr_raw(upb_arrayptr a, upb_arraylen_t elem,
size_t type_size) {
upb_valueptr p;
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED))
p._void = &a.refcounted->elements.uint8[elem * type_size];
else
p._void = &a.norefcount->elements.uint8[elem * type_size];
return p;
}
INLINE upb_valueptr _upb_array_getptr(upb_arrayptr a, upb_fielddef *f,
upb_arraylen_t elem) {
return _upb_array_getptr_raw(a, elem, upb_types[f->type].size);
}
INLINE upb_value upb_array_get(upb_arrayptr a, upb_fielddef *f,
upb_arraylen_t elem) {
assert(elem < upb_array_len(a));
return upb_value_read(_upb_array_getptr(a, f, elem), f->type);
}
// The caller releases a ref on the given array, which it must previously have
// owned a ref on.
INLINE void upb_array_unref(upb_arrayptr a, upb_fielddef *f) {
if(_upb_data_unref(a.base)) _upb_array_free(a, f);
}
#if 0
// Returns an array to which caller owns a ref, and contains the same contents
// as src. The returned value may be a copy of src, if the requested flags
// were incompatible with src's.
INLINE upb_arrayptr upb_array_getref(upb_arrayptr src, int ref_flags);
// Sets the given element in the array to val. The current length of the array
// must be greater than elem. If the field type is dynamic, the array will
// take a ref on val and release a ref on what was previously in the array.
INLINE void upb_array_set(upb_arrayptr a, upb_fielddef *f, int elem,
upb_value val);
// Note that array_append will attempt to take a reference on the given value,
// so to avoid a copy use append_default and get.
INLINE void upb_array_append(upb_arrayptr a, upb_fielddef *f,
upb_value val);
INLINE void upb_array_append_default(upb_arrayptr a, upb_fielddef *f,
upb_value val);
#endif
INLINE void upb_array_truncate(upb_arrayptr a) {
if(upb_data_hasflag(a.base, UPB_DATA_REFCOUNTED))
a.refcounted->len = 0;
else
a.norefcount->len = 0;
}
/* upb_msg ********************************************************************/
// Note that some inline functions for upb_msg are defined in upb_def.h since
// they rely on the defs.
struct _upb_msg {
upb_data base;
uint8_t data[4]; // We allocate the appropriate amount per message.
};
// Creates a new msg of the given type.
upb_msg *upb_msg_new(upb_msgdef *md);
// INTERNAL-ONLY:
// Frees the given message and releases references on members.
void _upb_msg_free(upb_msg *msg, upb_msgdef *md);
// INTERNAL-ONLY:
// Returns a pointer to the given field.
INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p;
p._void = &msg->data[f->byte_offset];
return p;
}
// Releases a references on msg.
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if(_upb_data_unref(&msg->base)) _upb_msg_free(msg, md);
}
// Tests whether the given field is explicitly set, or whether it will return
// a default.
INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0;
}
// Returns the current value if set, or the default value if not set, of the
// specified field. The caller does *not* own a ref.
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
if(upb_msg_has(msg, f)) {
return upb_value_read(_upb_msg_getptr(msg, f), f->type);
} else {
return f->default_value;
}
}
// Sets the given field to the given value. The msg will take a ref on val,
// and will drop a ref on whatever was there before.
void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
}
// A convenience function for decoding an entire protobuf all at once, without
// having to worry about setting up the appropriate objects.
void upb_msg_decodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str,
upb_status *status);
// A convenience function for encoding an entire protobuf all at once. If an
// error occurs, the null string is returned and the status object contains
// the error.
void upb_msg_encodestr(upb_msg *msg, upb_msgdef *md, upb_strptr str,
upb_status *status);
/* upb_msgsrc *****************************************************************/
// A nonresumable, non-interruptable (but simple and fast) source for pushing
// the data of a upb_msg to a upb_sink.
void upb_msgsrc_produce(upb_msg *msg, upb_msgdef *md, upb_sink *sink,
bool reverse, upb_status *status);
/* upb_msgsink ****************************************************************/
// A upb_msgsink can accept the data from a source and write it into a message.
struct upb_msgsink;
typedef struct upb_msgsink upb_msgsink;
// Allocate and free a msgsink, respectively.
upb_msgsink *upb_msgsink_new(upb_msgdef *md);
void upb_msgsink_free(upb_msgsink *sink);
// Returns the upb_sink (like an upcast).
upb_sink *upb_msgsink_sink(upb_msgsink *sink);
// Resets the msgsink for the given msg.
void upb_msgsink_reset(upb_msgsink *sink, upb_msg *msg);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -1,494 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb_def.h"
/* Functions to read wire values. *********************************************/
// These functions are internal to the decode, but might be moved into an
// internal header file if we at some point in the future opt to do code
// generation, because the generated code would want to inline these functions.
// The same applies to the functions to read .proto values below.
const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end,
uint64_t *val, upb_status *status);
// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT).
INLINE const uint8_t *upb_get_v_uint64_t(const uint8_t *buf, const uint8_t *end,
uint64_t *val, upb_status *status)
{
// We inline this common case (1-byte varints), if that fails we dispatch to
// the full (non-inlined) version.
if((*buf & 0x80) == 0) {
*val = *buf & 0x7f;
return buf + 1;
} else {
return upb_get_v_uint64_t_full(buf, end, val, status);
}
}
// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit
// varint is not a true wire type.
INLINE const uint8_t *upb_get_v_uint32_t(const uint8_t *buf, const uint8_t *end,
uint32_t *val, upb_status *status)
{
uint64_t val64;
const uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status);
*val = (uint32_t)val64; // Discard the high bits.
return ret;
}
// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
INLINE const uint8_t *upb_get_f_uint32_t(const uint8_t *buf, const uint8_t *end,
uint32_t *val, upb_status *status)
{
const uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
#if UPB_UNALIGNED_READS_OK
*val = *(uint32_t*)buf;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
*val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24);
#undef SHL
#endif
return uint32_end;
}
// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
INLINE const uint8_t *upb_get_f_uint64_t(const uint8_t *buf, const uint8_t *end,
uint64_t *val, upb_status *status)
{
const uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
#if UPB_UNALIGNED_READS_OK
*val = *(uint64_t*)buf;
#else
#define SHL(val, bits) ((uint64_t)val << bits)
*val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) |
SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56);
#undef SHL
#endif
return uint64_end;
}
INLINE const uint8_t *upb_skip_v_uint64_t(const uint8_t *buf,
const uint8_t *end,
upb_status *status)
{
const uint8_t *const maxend = buf + 10;
uint8_t last = 0x80;
for(; buf < (uint8_t*)end && (last & 0x80); buf++)
last = *buf;
if(buf >= end && buf <= maxend && (last & 0x80)) {
status->code = UPB_STATUS_NEED_MORE_DATA;
buf = end;
} else if(buf > maxend) {
status->code = UPB_ERROR_UNTERMINATED_VARINT;
buf = end;
}
return buf;
}
INLINE const uint8_t *upb_skip_f_uint32_t(const uint8_t *buf,
const uint8_t *end,
upb_status *status)
{
const uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
return uint32_end;
}
INLINE const uint8_t *upb_skip_f_uint64_t(const uint8_t *buf,
const uint8_t *end,
upb_status *status)
{
const uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
return uint64_end;
}
/* Functions to read .proto values. *******************************************/
// Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// Use macros to define a set of two functions for each .proto type:
//
// // Reads and converts a .proto value from buf, placing it in d.
// // "end" indicates the end of the current buffer (if the buffer does
// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned).
// // On success, a pointer will be returned to the first byte that was
// // not consumed.
// const uint8_t *upb_get_INT32(const uint8_t *buf, const uint8_t *end,
// int32_t *d, upb_status *status);
//
// // Given an already read wire value s (source), convert it to a .proto
// // value and return it.
// int32_t upb_wvtov_INT32(uint32_t s);
//
// These are the most efficient functions to call if you want to decode a value
// for a known type.
#define WVTOV(type, wire_t, val_t) \
INLINE val_t upb_wvtov_ ## type(wire_t s)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
INLINE const uint8_t *upb_get_ ## type(const uint8_t *buf, const uint8_t *end, \
val_t *d, upb_status *status) { \
wire_t tmp = 0; \
const uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \
*d = upb_wvtov_ ## type(tmp); \
return ret; \
}
#define T(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t); /* prototype for GET below */ \
GET(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t)
T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; }
T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); }
T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; }
T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; }
T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; }
T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(DOUBLE, f, uint64_t, double, _double) {
upb_value v;
v.uint64 = s;
return v._double;
}
T(FLOAT, f, uint32_t, float, _float) {
upb_value v;
v.uint32 = s;
return v._float;
}
#undef WVTOV
#undef GET
#undef T
// Parses a tag, places the result in *tag.
INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end,
upb_tag *tag, upb_status *status)
{
uint32_t tag_int;
const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status);
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
return ret;
}
// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
// handles 1 and 2 byte varints).
const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end,
uint64_t *val, upb_status *status)
{
const uint8_t *const maxend = buf + 10;
uint8_t last = 0x80;
*val = 0;
int bitpos;
for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
*val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
if(buf >= end && buf <= maxend && (last & 0x80)) {
upb_seterr(status, UPB_STATUS_NEED_MORE_DATA,
"Provided data ended in the middle of a varint.\n");
buf = end;
} else if(buf > maxend) {
upb_seterr(status, UPB_ERROR_UNTERMINATED_VARINT,
"Varint was unterminated after 10 bytes.\n");
buf = end;
}
return buf;
}
const uint8_t *upb_decode_wire_value(uint8_t *buf, uint8_t *end,
upb_wire_type_t wt, upb_wire_value *wv,
upb_status *status)
{
switch(wt) {
case UPB_WIRE_TYPE_VARINT:
return upb_get_v_uint64_t(buf, end, &wv->varint, status);
case UPB_WIRE_TYPE_64BIT:
return upb_get_f_uint64_t(buf, end, &wv->_64bit, status);
case UPB_WIRE_TYPE_32BIT:
return upb_get_f_uint32_t(buf, end, &wv->_32bit, status);
default:
status->code = UPB_STATUS_ERROR; // Doesn't handle delimited, groups.
return end;
}
}
// Advances buf past the current wire value (of type wt), saving the result in
// outbuf.
static const uint8_t *skip_wire_value(const uint8_t *buf, const uint8_t *end,
upb_wire_type_t wt, upb_status *status)
{
switch(wt) {
case UPB_WIRE_TYPE_VARINT:
return upb_skip_v_uint64_t(buf, end, status);
case UPB_WIRE_TYPE_64BIT:
return upb_skip_f_uint64_t(buf, end, status);
case UPB_WIRE_TYPE_32BIT:
return upb_skip_f_uint32_t(buf, end, status);
case UPB_WIRE_TYPE_START_GROUP:
// TODO: skip to matching end group.
case UPB_WIRE_TYPE_END_GROUP:
return buf;
default:
status->code = UPB_STATUS_ERROR;
return end;
}
}
static const uint8_t *upb_decode_value(const uint8_t *buf, const uint8_t *end,
upb_field_type_t ft, upb_valueptr v,
upb_status *status)
{
#define CASE(t, member_name) \
case UPB_TYPE(t): return upb_get_ ## t(buf, end, v.member_name, status);
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
default: return end;
}
#undef CASE
}
// The decoder keeps a stack with one entry per level of recursion.
// upb_decoder_frame is one frame of that stack.
typedef struct {
upb_msgdef *msgdef;
upb_fielddef *field;
size_t end_offset; // For groups, 0.
} upb_decoder_frame;
struct upb_decoder {
// Immutable state of the decoder.
upb_msgdef *toplevel_msgdef;
upb_sink *sink;
// State pertaining to a particular decode (resettable).
// Stack entries store the offset where the submsg ends (for groups, 0).
upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit;
size_t completed_offset;
void *udata;
};
upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
{
upb_decoder *d = malloc(sizeof(*d));
d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING];
return d;
}
void upb_decoder_free(upb_decoder *d)
{
free(d);
}
void upb_decoder_reset(upb_decoder *d, upb_sink *sink)
{
d->top = d->stack;
d->completed_offset = 0;
d->sink = sink;
d->top->msgdef = d->toplevel_msgdef;
// The top-level message is not delimited (we can keep receiving data for it
// indefinitely), so we treat it like a group.
d->top->end_offset = 0;
}
static const void *get_msgend(upb_decoder *d, const uint8_t *start)
{
if(d->top->end_offset > 0)
return start + (d->top->end_offset - d->completed_offset);
else
return (void*)UINTPTR_MAX; // group.
}
static bool isgroup(const void *submsg_end)
{
return submsg_end == (void*)UINTPTR_MAX;
}
extern upb_wire_type_t upb_expected_wire_types[];
// Returns true if wt is the correct on-the-wire type for ft.
INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
// This doesn't currently support packed arrays.
return upb_types[ft].expected_wire_type == wt;
}
// Pushes a new stack frame for a submessage with the given len (which will
// be zero if the submessage is a group).
static const uint8_t *push(upb_decoder *d, const uint8_t *start,
uint32_t submsg_len, upb_fielddef *f,
upb_status *status)
{
d->top->field = f;
d->top++;
if(d->top >= d->limit) {
upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED,
"Nesting exceeded maximum (%d levels)\n",
UPB_MAX_NESTING);
return NULL;
}
upb_decoder_frame *frame = d->top;
frame->end_offset = d->completed_offset + submsg_len;
frame->msgdef = upb_downcast_msgdef(f->def);
upb_sink_onstart(d->sink, f, status);
return get_msgend(d, start);
}
// Pops a stack frame, returning a pointer for where the next submsg should
// end (or a pointer that is out of range for a group).
static const void *pop(upb_decoder *d, const uint8_t *start, upb_status *status)
{
d->top--;
upb_sink_onend(d->sink, d->top->field, status);
return get_msgend(d, start);
}
size_t upb_decoder_decode(upb_decoder *d, upb_strptr str, upb_status *status)
{
// buf is our current offset, moves from start to end.
const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str);
const uint8_t *const start = buf; // ptr equivalent of d->completed_offset
const uint8_t *const end = buf + upb_strlen(str);
// When we have fully decoded a tag/value pair, we advance this.
const uint8_t *completed = buf;
const uint8_t *submsg_end = get_msgend(d, start);
upb_msgdef *msgdef = d->top->msgdef;
upb_sink_status sink_status = UPB_SINK_CONTINUE;
// We need to check the status of operations that can fail, but we do so as
// late as possible to avoid introducing branches that have to wait on
// (status->code) which must be loaded from memory. We must always check
// before calling a user callback.
#define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0)
// Main loop: executed once per tag/field pair.
while(sink_status == UPB_SINK_CONTINUE && buf < end) {
// Parse/handle tag.
upb_tag tag;
buf = decode_tag(buf, end, &tag, status);
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
CHECK_STATUS();
if(!isgroup(submsg_end)) {
upb_seterr(status, UPB_STATUS_ERROR, "End group seen but current "
"message is not a group, byte offset: %zd",
d->completed_offset + (completed - start));
goto err;
}
submsg_end = pop(d, start, status);
msgdef = d->top->msgdef;
completed = buf;
continue;
}
// Look up field by tag number.
upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number);
// Parse/handle field.
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
buf = upb_get_INT32(buf, end, &delim_len, status);
CHECK_STATUS(); // Checking decode_tag() and upb_get_INT32().
const uint8_t *delim_end = buf + delim_len;
if(f && f->type == UPB_TYPE(MESSAGE)) {
submsg_end = push(d, start, delim_end - start, f, status);
msgdef = d->top->msgdef;
} else {
if(f && upb_isstringtype(f->type)) {
int32_t str_start = buf - start;
uint32_t len = str_start + delim_len;
sink_status = upb_sink_onstr(d->sink, f, str, str_start, len, status);
} // else { TODO: packed arrays }
// If field was not found, it is skipped silently.
buf = delim_end; // Could be >end.
}
} else {
if(!f || !upb_check_type(tag.wire_type, f->type)) {
buf = skip_wire_value(buf, end, tag.wire_type, status);
} else if (f->type == UPB_TYPE(GROUP)) {
submsg_end = push(d, start, 0, f, status);
msgdef = d->top->msgdef;
} else {
upb_value val;
buf = upb_decode_value(buf, end, f->type, upb_value_addrof(&val),
status);
CHECK_STATUS(); // Checking upb_decode_value().
sink_status = upb_sink_onvalue(d->sink, f, val, status);
}
}
CHECK_STATUS();
while(buf >= submsg_end) {
if(buf > submsg_end) {
upb_seterr(status, UPB_STATUS_ERROR, "Expected submsg end offset "
"did not lie on a tag/value boundary.");
goto err;
}
submsg_end = pop(d, start, status);
msgdef = d->top->msgdef;
}
// while(buf < d->packed_end) { TODO: packed arrays }
completed = buf;
}
size_t read;
err:
read = (char*)completed - (char*)start;
d->completed_offset += read;
return read;
}

@ -1,56 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* upb_decoder implements a high performance, callback-based, stream-oriented
* decoder (comparable to the SAX model in XML parsers). For parsing protobufs
* into in-memory messages (a more DOM-like model), see the routines in
* upb_msg.h, which are layered on top of this decoder.
*
* TODO: the decoder currently does not support returning unknown values. This
* can easily be added when it is needed.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
#include <stdbool.h>
#include <stdint.h>
#include "upb.h"
#include "descriptor.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_decoder *****************************************************************/
// A upb_decoder decodes the binary protocol buffer format, writing the data it
// decodes to a upb_sink.
struct upb_decoder;
typedef struct upb_decoder upb_decoder;
// Allocates and frees a upb_decoder, respectively.
upb_decoder *upb_decoder_new(upb_msgdef *md);
void upb_decoder_free(upb_decoder *p);
// Resets the internal state of an already-allocated decoder. This puts it in a
// state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Parsers must be reset before they can be
// used. A decoder can be reset multiple times.
void upb_decoder_reset(upb_decoder *p, upb_sink *sink);
// Decodes protobuf data out of str, returning how much data was decoded. The
// next call to upb_decoder_decode should begin with the first byte that was
// not decoded. "status" indicates whether an error occurred.
//
// TODO: provide the following guarantee:
// retval will always be >= len.
size_t upb_decoder_decode(upb_decoder *p, upb_strptr str, upb_status *status);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DECODER_H_ */

@ -1,823 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
*/
#include <stdlib.h>
#include <limits.h>
#include "descriptor.h"
#include "upb_def.h"
#include "upb_data.h"
/* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
static int div_round_up(int numerator, int denominator) {
/* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
}
/* upb_def ********************************************************************/
// Defs are reference counted, but can have cycles when types are
// self-recursive or mutually recursive, so we need to be capable of collecting
// the cycles. In our situation defs are immutable (so cycles cannot be
// created or destroyed post-initialization). We need to be thread-safe but
// want to avoid locks if at all possible and rely only on atomic operations.
//
// Our scheme is as follows. First we give each def a flag indicating whether
// it is part of a cycle or not. Because defs are immutable, this flag will
// never change. For acyclic defs, we can use a naive algorithm and avoid the
// overhead of dealing with cycles. Most defs will be acyclic, and most cycles
// will be very short.
//
// For defs that participate in cycles we keep two reference counts. One
// tracks references that come from outside the cycle (we call these external
// references), and is incremented and decremented like a regular refcount.
// The other is a cycle refcount, and works as follows. Every cycle is
// considered distinct, even if two cycles share members. For example, this
// graph has two distinct cycles:
//
// A-->B-->C
// ^ | |
// +---+---+
//
// The cycles in this graph are AB and ABC. When A's external refcount
// transitions from 0->1, we say that A takes "cycle references" on both
// cycles. Taking a cycle reference means incrementing the cycle refcount of
// all defs in the cycle. Since A and B are common to both cycles, A and B's
// cycle refcounts will be incremented by two, and C's will be incremented by
// one. Likewise, when A's external refcount transitions from 1->0, we
// decrement A and B's cycle refcounts by two and C's by one. We collect a
// cyclic type when its cycle refcount drops to zero. A precondition for this
// is that the external refcount has dropped to zero also.
//
// This algorithm is relatively cheap, since it only requires extra work when
// the external refcount on a cyclic type transitions from 0->1 or 1->0.
static void msgdef_free(upb_msgdef *m);
static void enumdef_free(upb_enumdef *e);
static void unresolveddef_free(struct _upb_unresolveddef *u);
static void def_free(upb_def *def)
{
switch(def->type) {
case UPB_DEF_MSG:
msgdef_free(upb_downcast_msgdef(def));
break;
case UPB_DEF_ENUM:
enumdef_free(upb_downcast_enumdef(def));
break;
case UPB_DEF_SVC:
assert(false); /* Unimplemented. */
break;
case UPB_DEF_EXT:
assert(false); /* Unimplemented. */
break;
case UPB_DEF_UNRESOLVED:
unresolveddef_free(upb_downcast_unresolveddef(def));
break;
default:
assert(false);
}
}
// Depth-first search for all cycles that include cycle_base. Returns the
// number of paths from def that lead to cycle_base, which is equivalent to the
// number of cycles def is in that include cycle_base.
//
// open_defs tracks the set of nodes that are currently being visited in the
// search so we can stop the search if we detect a cycles that do not involve
// cycle_base. We can't color the nodes as we go by writing to a member of the
// def, because another thread could be performing the search concurrently.
static int cycle_ref_or_unref(upb_msgdef *m, upb_msgdef *cycle_base,
upb_msgdef **open_defs, int num_open_defs,
bool ref) {
bool found = false;
for(int i = 0; i < num_open_defs; i++) {
if(open_defs[i] == m) {
// We encountered a cycle that did not involve cycle_base.
found = true;
break;
}
}
if(found || num_open_defs == UPB_MAX_TYPE_CYCLE_LEN) {
return 0;
} else if(m == cycle_base) {
return 1;
} else {
int path_count = 0;
if(cycle_base == NULL) {
cycle_base = m;
} else {
open_defs[num_open_defs++] = m;
}
for(int i = 0; i < m->num_fields; i++) {
upb_fielddef *f = &m->fields[i];
upb_def *def = f->def;
if(upb_issubmsg(f) && def->is_cyclic) {
upb_msgdef *sub_m = upb_downcast_msgdef(def);
path_count += cycle_ref_or_unref(sub_m, cycle_base, open_defs,
num_open_defs, ref);
}
}
if(ref) {
upb_atomic_add(&m->cycle_refcount, path_count);
} else {
if(upb_atomic_add(&m->cycle_refcount, -path_count))
def_free(UPB_UPCAST(m));
}
return path_count;
}
}
void _upb_def_reftozero(upb_def *def) {
if(def->is_cyclic) {
upb_msgdef *m = upb_downcast_msgdef(def);
upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
cycle_ref_or_unref(m, NULL, open_defs, 0, false);
} else {
def_free(def);
}
}
void _upb_def_cyclic_ref(upb_def *def) {
upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
cycle_ref_or_unref(upb_downcast_msgdef(def), NULL, open_defs, 0, true);
}
static void upb_def_init(upb_def *def, enum upb_def_type type,
upb_strptr fqname) {
def->type = type;
def->is_cyclic = 0; // We detect this later, after resolving refs.
def->search_depth = 0;
def->fqname = upb_string_getref(fqname, UPB_REF_FROZEN);
upb_atomic_refcount_init(&def->refcount, 1);
}
static void upb_def_uninit(upb_def *def) {
upb_string_unref(def->fqname);
}
/* upb_unresolveddef **********************************************************/
typedef struct _upb_unresolveddef {
upb_def base;
upb_strptr name;
} upb_unresolveddef;
static upb_unresolveddef *upb_unresolveddef_new(upb_strptr str) {
upb_unresolveddef *def = malloc(sizeof(*def));
upb_strptr name = upb_string_getref(str, UPB_REF_THREADUNSAFE_READONLY);
upb_def_init(&def->base, UPB_DEF_UNRESOLVED, name);
def->name = name;
return def;
}
static void unresolveddef_free(struct _upb_unresolveddef *def) {
upb_string_unref(def->name);
upb_def_uninit(&def->base);
free(def);
}
/* upb_fielddef ***************************************************************/
static void fielddef_init(upb_fielddef *f,
google_protobuf_FieldDescriptorProto *fd)
{
f->type = fd->type;
f->label = fd->label;
f->number = fd->number;
f->name = upb_string_getref(fd->name, UPB_REF_FROZEN);
f->def = NULL;
f->owned = false;
assert(fd->set_flags.has.type_name == upb_hasdef(f));
if(fd->set_flags.has.type_name) {
f->def = UPB_UPCAST(upb_unresolveddef_new(fd->type_name));
f->owned = true;
}
}
static upb_fielddef *fielddef_new(google_protobuf_FieldDescriptorProto *fd)
{
upb_fielddef *f = malloc(sizeof(*f));
fielddef_init(f, fd);
return f;
}
static void fielddef_uninit(upb_fielddef *f)
{
upb_string_unref(f->name);
if(upb_hasdef(f) && f->owned) {
upb_def_unref(f->def);
}
}
static void fielddef_free(upb_fielddef *f) {
fielddef_uninit(f);
free(f);
}
static void fielddef_copy(upb_fielddef *dst, upb_fielddef *src)
{
*dst = *src;
dst->name = upb_string_getref(src->name, UPB_REF_FROZEN);
if(upb_hasdef(src)) {
upb_def_ref(dst->def);
dst->owned = true;
}
}
// Callback for sorting fields.
static int compare_fields(upb_fielddef *f1, upb_fielddef *f2) {
// Required fields go before non-required.
bool req1 = f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
bool req2 = f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
if(req1 != req2) {
return req2 - req1;
} else {
// Within required and non-required field lists, list in number order.
// TODO: consider ordering by data size to reduce padding. */
return f1->number - f2->number;
}
}
static int compare_fielddefs(const void *e1, const void *e2) {
return compare_fields(*(void**)e1, *(void**)e2);
}
static int compare_fds(const void *e1, const void *e2) {
upb_fielddef f1, f2;
fielddef_init(&f1, *(void**)e1);
fielddef_init(&f2, *(void**)e2);
int ret = compare_fields(&f1, &f2);
fielddef_uninit(&f1);
fielddef_uninit(&f2);
return ret;
}
void upb_fielddef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
{
qsort(fds, num, sizeof(*fds), compare_fds);
}
static void fielddef_sort(upb_fielddef **defs, size_t num)
{
qsort(defs, num, sizeof(*defs), compare_fielddefs);
}
/* upb_msgdef *****************************************************************/
static upb_msgdef *msgdef_new(upb_fielddef **fields, int num_fields,
upb_strptr fqname, upb_status *status)
{
if(num_fields > UPB_MAX_FIELDS) {
upb_seterr(status, UPB_STATUS_ERROR,
"Tried to create a msgdef with more than %d fields", num_fields);
free(fields);
return NULL;
}
upb_msgdef *m = malloc(sizeof(*m));
upb_def_init(&m->base, UPB_DEF_MSG, fqname);
upb_atomic_refcount_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, num_fields, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, num_fields, sizeof(upb_ntof_ent));
m->num_fields = num_fields;
m->set_flags_bytes = div_round_up(m->num_fields, 8);
// These are incremented in the loop.
m->num_required_fields = 0;
m->size = m->set_flags_bytes + 4; // 4 for the refcount.
m->fields = malloc(sizeof(upb_fielddef) * num_fields);
size_t max_align = 0;
for(int i = 0; i < num_fields; i++) {
upb_fielddef *f = &m->fields[i];
upb_type_info *type_info = &upb_types[fields[i]->type];
fielddef_copy(f, fields[i]);
// General alignment rules are: each member must be at an address that is a
// multiple of that type's alignment. Also, the size of the structure as
// a whole must be a multiple of the greatest alignment of any member. */
f->field_index = i;
size_t offset = ALIGN_UP(m->size, type_info->align);
f->byte_offset = offset - 4; // Offsets are relative to the refcount.
m->size = offset + type_info->size;
max_align = UPB_MAX(max_align, type_info->align);
if(f->label == UPB_LABEL(REQUIRED)) {
// We currently rely on the fact that required fields are always sorted
// to occur before non-required fields.
m->num_required_fields++;
}
// Insert into the tables.
upb_itof_ent itof_ent = {{f->number, 0}, f};
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, &itof_ent.e);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
}
if(max_align > 0) m->size = ALIGN_UP(m->size, max_align);
return m;
}
static void msgdef_free(upb_msgdef *m)
{
for (upb_field_count_t i = 0; i < m->num_fields; i++)
fielddef_uninit(&m->fields[i]);
free(m->fields);
upb_strtable_free(&m->ntof);
upb_inttable_free(&m->itof);
upb_def_uninit(&m->base);
free(m);
}
static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
(void)m;
if(f->owned) upb_def_unref(f->def);
f->def = def;
// We will later make the ref unowned if it is a part of a cycle.
f->owned = true;
upb_def_ref(def);
}
/* upb_enumdef ****************************************************************/
typedef struct {
upb_strtable_entry e;
uint32_t value;
} ntoi_ent;
typedef struct {
upb_inttable_entry e;
upb_strptr string;
} iton_ent;
static upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed,
upb_strptr fqname)
{
upb_enumdef *e = malloc(sizeof(*e));
upb_def_init(&e->base, UPB_DEF_ENUM, fqname);
int num_values = ed->set_flags.has.value ?
google_protobuf_EnumValueDescriptorProto_array_len(ed->value) : 0;
upb_strtable_init(&e->ntoi, num_values, sizeof(ntoi_ent));
upb_inttable_init(&e->iton, num_values, sizeof(iton_ent));
for(int i = 0; i < num_values; i++) {
google_protobuf_EnumValueDescriptorProto *value =
google_protobuf_EnumValueDescriptorProto_array_get(ed->value, i);
ntoi_ent ntoi_ent = {{value->name, 0}, value->number};
iton_ent iton_ent = {{value->number, 0}, value->name};
upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
upb_inttable_insert(&e->iton, &iton_ent.e);
}
return e;
}
static void enumdef_free(upb_enumdef *e) {
upb_strtable_free(&e->ntoi);
upb_inttable_free(&e->iton);
upb_def_uninit(&e->base);
free(e);
}
static void fill_iter(upb_enum_iter *iter, ntoi_ent *ent) {
iter->state = ent;
iter->name = ent->e.key;
iter->val = ent->value;
}
void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e) {
// We could iterate over either table here; the choice is arbitrary.
ntoi_ent *ent = upb_strtable_begin(&e->ntoi);
iter->e = e;
fill_iter(iter, ent);
}
void upb_enum_next(upb_enum_iter *iter) {
ntoi_ent *ent = iter->state;
assert(ent);
ent = upb_strtable_next(&iter->e->ntoi, &ent->e);
iter->state = ent;
if(ent) fill_iter(iter, ent);
}
bool upb_enum_done(upb_enum_iter *iter) {
return iter->state == NULL;
}
/* symtab internal ***********************************************************/
typedef struct {
upb_strtable_entry e;
upb_def *def;
} symtab_ent;
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
{
int off = len-1;
while(off > 0 && data[off] != c) --off;
return off;
}
/* Given a symbol and the base symbol inside which it is defined, find the
* symbol's definition in t. */
static symtab_ent *resolve(upb_strtable *t, upb_strptr base, upb_strptr symbol)
{
if(upb_strlen(base) + upb_strlen(symbol) + 1 >= UPB_SYMBOL_MAXLEN ||
upb_strlen(symbol) == 0) return NULL;
if(upb_string_getrobuf(symbol)[0] == UPB_SYMBOL_SEPARATOR) {
// Symbols starting with '.' are absolute, so we do a single lookup.
// Slice to omit the leading '.'
upb_strptr sym_str = upb_strslice(symbol, 1, INT_MAX);
symtab_ent *e = upb_strtable_lookup(t, sym_str);
upb_string_unref(sym_str);
return e;
} else {
// Remove components from base until we find an entry or run out.
upb_strptr sym_str = upb_string_new();
int baselen = upb_strlen(base);
while(1) {
// sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol
upb_strlen_t len = baselen + upb_strlen(symbol) + 1;
char *buf = upb_string_getrwbuf(sym_str, len);
memcpy(buf, upb_string_getrobuf(base), baselen);
buf[baselen] = UPB_SYMBOL_SEPARATOR;
memcpy(buf + baselen + 1, upb_string_getrobuf(symbol), upb_strlen(symbol));
symtab_ent *e = upb_strtable_lookup(t, sym_str);
if (e) return e;
else if(baselen == 0) return NULL; /* No more scopes to try. */
baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
}
}
}
/* Joins strings together, for example:
* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
* join("", "Baz") -> "Baz"
* Caller owns a ref on the returned string. */
static upb_strptr join(upb_strptr base, upb_strptr name) {
upb_strptr joined = upb_strdup(base);
upb_strlen_t len = upb_strlen(joined);
if(len > 0) {
upb_string_getrwbuf(joined, len + 1)[len] = UPB_SYMBOL_SEPARATOR;
}
upb_strcat(joined, name);
return joined;
}
static upb_strptr try_define(upb_strtable *t, upb_strptr base,
upb_strptr name, upb_status *status)
{
if(upb_string_isnull(name)) {
upb_seterr(status, UPB_STATUS_ERROR,
"symbol in context '" UPB_STRFMT "' does not have a name",
UPB_STRARG(base));
return UPB_STRING_NULL;
}
upb_strptr fqname = join(base, name);
if(upb_strtable_lookup(t, fqname)) {
upb_seterr(status, UPB_STATUS_ERROR,
"attempted to redefine symbol '" UPB_STRFMT "'",
UPB_STRARG(fqname));
upb_string_unref(fqname);
return UPB_STRING_NULL;
}
return fqname;
}
static void insert_enum(upb_strtable *t,
google_protobuf_EnumDescriptorProto *ed,
upb_strptr base, upb_status *status)
{
upb_strptr name = ed->set_flags.has.name ? ed->name : UPB_STRING_NULL;
upb_strptr fqname = try_define(t, base, name, status);
if(upb_string_isnull(fqname)) return;
symtab_ent e;
e.e.key = fqname;
e.def = UPB_UPCAST(enumdef_new(ed, fqname));
upb_strtable_insert(t, &e.e);
upb_string_unref(fqname);
}
static void insert_message(upb_strtable *t, google_protobuf_DescriptorProto *d,
upb_strptr base, bool sort, upb_status *status)
{
upb_strptr name = d->set_flags.has.name ? d->name : UPB_STRING_NULL;
upb_strptr fqname = try_define(t, base, name, status);
if(upb_string_isnull(fqname)) return;
int num_fields = d->set_flags.has.field ?
google_protobuf_FieldDescriptorProto_array_len(d->field) : 0;
symtab_ent e;
e.e.key = fqname;
// Gather our list of fields, sorting if necessary.
upb_fielddef **fielddefs = malloc(sizeof(*fielddefs) * num_fields);
for (int i = 0; i < num_fields; i++) {
google_protobuf_FieldDescriptorProto *fd =
google_protobuf_FieldDescriptorProto_array_get(d->field, i);
fielddefs[i] = fielddef_new(fd);
}
if(sort) fielddef_sort(fielddefs, num_fields);
// Create the msgdef with that list of fields.
e.def = UPB_UPCAST(msgdef_new(fielddefs, num_fields, fqname, status));
// Cleanup.
for (int i = 0; i < num_fields; i++) fielddef_free(fielddefs[i]);
free(fielddefs);
if(!upb_ok(status)) goto error;
upb_strtable_insert(t, &e.e);
/* Add nested messages and enums. */
if(d->set_flags.has.nested_type)
for(unsigned int i = 0; i < google_protobuf_DescriptorProto_array_len(d->nested_type); i++)
insert_message(t, google_protobuf_DescriptorProto_array_get(d->nested_type, i), fqname, sort, status);
if(d->set_flags.has.enum_type)
for(unsigned int i = 0; i < google_protobuf_EnumDescriptorProto_array_len(d->enum_type); i++)
insert_enum(t, google_protobuf_EnumDescriptorProto_array_get(d->enum_type, i), fqname, status);
error:
// Free the ref we got from try_define().
upb_string_unref(fqname);
}
static bool find_cycles(upb_msgdef *m, int search_depth, upb_status *status)
{
if(search_depth > UPB_MAX_TYPE_DEPTH) {
// There are many situations in upb where we recurse over the type tree
// (like for example, right now) and an absurdly deep tree could cause us
// to stack overflow on systems with very limited stacks.
upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was found at "
"depth %d in the type graph, which exceeds the maximum type "
"depth of %d.", UPB_UPCAST(m)->fqname, search_depth,
UPB_MAX_TYPE_DEPTH);
return false;
} else if(UPB_UPCAST(m)->search_depth == 1) {
// Cycle!
int cycle_len = search_depth - 1;
if(cycle_len > UPB_MAX_TYPE_CYCLE_LEN) {
upb_seterr(status, UPB_STATUS_ERROR, "Type " UPB_STRFMT " was involved "
"in a cycle of length %d, which exceeds the maximum type "
"cycle length of %d.", UPB_UPCAST(m)->fqname, cycle_len,
UPB_MAX_TYPE_CYCLE_LEN);
}
return true;
} else if(UPB_UPCAST(m)->search_depth > 0) {
// This was a cycle, but did not originate from the base of our search tree.
// We'll find it when we call find_cycles() on this node directly.
return false;
} else {
UPB_UPCAST(m)->search_depth = ++search_depth;
bool cycle_found = false;
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
upb_fielddef *f = &m->fields[i];
if(!upb_issubmsg(f)) continue;
upb_def *sub_def = f->def;
upb_msgdef *sub_m = upb_downcast_msgdef(sub_def);
if(find_cycles(sub_m, search_depth, status)) {
cycle_found = true;
UPB_UPCAST(m)->is_cyclic = true;
if(f->owned) {
upb_atomic_unref(&sub_def->refcount);
f->owned = false;
}
}
}
UPB_UPCAST(m)->search_depth = 0;
return cycle_found;
}
}
static void addfd(upb_strtable *addto, upb_strtable *existingdefs,
google_protobuf_FileDescriptorProto *fd, bool sort,
upb_status *status)
{
upb_strptr pkg;
if(fd->set_flags.has.package) {
pkg = upb_string_getref(fd->package, UPB_REF_FROZEN);
} else {
pkg = upb_string_new();
}
if(fd->set_flags.has.message_type)
for(unsigned int i = 0; i < google_protobuf_DescriptorProto_array_len(fd->message_type); i++)
insert_message(addto, google_protobuf_DescriptorProto_array_get(fd->message_type, i), pkg, sort, status);
if(fd->set_flags.has.enum_type)
for(unsigned int i = 0; i < google_protobuf_EnumDescriptorProto_array_len(fd->enum_type); i++)
insert_enum(addto, google_protobuf_EnumDescriptorProto_array_get(fd->enum_type, i), pkg, status);
upb_string_unref(pkg);
if(!upb_ok(status)) {
// TODO: make sure we don't leak any memory in this case.
return;
}
/* TODO: handle extensions and services. */
// Attempt to resolve all references.
symtab_ent *e;
for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) {
upb_msgdef *m = upb_dyncast_msgdef(e->def);
if(!m) continue;
upb_strptr base = e->e.key;
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
upb_fielddef *f = &m->fields[i];
if(!upb_hasdef(f)) continue; // No resolving necessary.
upb_strptr name = upb_downcast_unresolveddef(f->def)->name;
symtab_ent *found = resolve(existingdefs, base, name);
if(!found) found = resolve(addto, base, name);
upb_field_type_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
if(!found) {
upb_seterr(status, UPB_STATUS_ERROR,
"could not resolve symbol '" UPB_STRFMT "'"
" in context '" UPB_STRFMT "'",
UPB_STRARG(name), UPB_STRARG(base));
return;
} else if(found->def->type != expected) {
upb_seterr(status, UPB_STATUS_ERROR, "Unexpected type");
return;
}
upb_msgdef_resolve(m, f, found->def);
}
}
// Deal with type cycles.
for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) {
upb_msgdef *m = upb_dyncast_msgdef(e->def);
if(!m) continue;
// Do an initial pass over the graph to check that there are no cycles
// longer than the maximum length. We also mark all cyclic defs as such,
// and decrement refs on cyclic defs.
find_cycles(m, 0, status);
upb_msgdef *open_defs[UPB_MAX_TYPE_CYCLE_LEN];
cycle_ref_or_unref(m, NULL, open_defs, 0, true);
}
}
/* upb_symtab *****************************************************************/
upb_symtab *upb_symtab_new()
{
upb_symtab *s = malloc(sizeof(*s));
upb_atomic_refcount_init(&s->refcount, 1);
upb_rwlock_init(&s->lock);
upb_strtable_init(&s->symtab, 16, sizeof(symtab_ent));
upb_strtable_init(&s->psymtab, 16, sizeof(symtab_ent));
// Add descriptor.proto types to private symtable so we can parse descriptors.
// We know there is only 1.
google_protobuf_FileDescriptorProto *fd =
google_protobuf_FileDescriptorProto_array_get(upb_file_descriptor_set->file, 0);
upb_status status = UPB_STATUS_INIT;
addfd(&s->psymtab, &s->symtab, fd, false, &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Failed to initialize upb: %s.\n", status.msg);
assert(false);
return NULL; // Indicates that upb is buggy or corrupt.
}
upb_static_string name =
UPB_STATIC_STRING_INIT("google.protobuf.FileDescriptorSet");
upb_strptr nameptr = UPB_STATIC_STRING_PTR_INIT(name);
symtab_ent *e = upb_strtable_lookup(&s->psymtab, nameptr);
assert(e);
s->fds_msgdef = upb_downcast_msgdef(e->def);
return s;
}
static void free_symtab(upb_strtable *t)
{
symtab_ent *e;
for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
upb_def_unref(e->def);
upb_strtable_free(t);
}
void _upb_symtab_free(upb_symtab *s)
{
free_symtab(&s->symtab);
free_symtab(&s->psymtab);
upb_rwlock_destroy(&s->lock);
free(s);
}
upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type)
{
upb_rwlock_rdlock(&s->lock);
int total = upb_strtable_count(&s->symtab);
// We may only use part of this, depending on how many symbols are of the
// correct type.
upb_def **defs = malloc(sizeof(*defs) * total);
symtab_ent *e = upb_strtable_begin(&s->symtab);
int i = 0;
for(; e; e = upb_strtable_next(&s->symtab, &e->e)) {
upb_def *def = e->def;
assert(def);
if(type == UPB_DEF_ANY || def->type == type)
defs[i++] = def;
}
upb_rwlock_unlock(&s->lock);
*count = i;
for(i = 0; i < *count; i++)
upb_def_ref(defs[i]);
return defs;
}
upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym)
{
upb_rwlock_rdlock(&s->lock);
symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
upb_def *ret = NULL;
if(e) {
ret = e->def;
upb_def_ref(ret);
}
upb_rwlock_unlock(&s->lock);
return ret;
}
upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol) {
upb_rwlock_rdlock(&s->lock);
symtab_ent *e = resolve(&s->symtab, base, symbol);
upb_def *ret = NULL;
if(e) {
ret = e->def;
upb_def_ref(ret);
}
upb_rwlock_unlock(&s->lock);
return ret;
}
void upb_symtab_addfds(upb_symtab *s, google_protobuf_FileDescriptorSet *fds,
upb_status *status)
{
if(fds->set_flags.has.file) {
// Insert new symbols into a temporary table until we have verified that
// the descriptor is valid.
upb_strtable tmp;
upb_strtable_init(&tmp, 0, sizeof(symtab_ent));
{ // Read lock scope
upb_rwlock_rdlock(&s->lock);
for(uint32_t i = 0; i < google_protobuf_FileDescriptorProto_array_len(fds->file); i++) {
addfd(&tmp, &s->symtab, google_protobuf_FileDescriptorProto_array_get(fds->file, i), true, status);
if(!upb_ok(status)) {
free_symtab(&tmp);
upb_rwlock_unlock(&s->lock);
return;
}
}
upb_rwlock_unlock(&s->lock);
}
// Everything was successfully added, copy from the tmp symtable.
{ // Write lock scope
upb_rwlock_wrlock(&s->lock);
symtab_ent *e;
for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) {
// We checked for duplicates when we had only the read lock, but it is
// theoretically possible that a duplicate symbol when we dropped the
// read lock to acquire a write lock.
if(upb_strtable_lookup(&s->symtab, e->e.key)) {
upb_seterr(status, UPB_STATUS_ERROR, "Attempted to insert duplicate "
"symbol: " UPB_STRFMT, UPB_STRARG(e->e.key));
// To truly handle this situation we would need to remove any symbols
// from tmp that were successfully inserted into s->symtab. Because
// this case is exceedingly unlikely, and because our hashtable
// doesn't support deletions right now, we leave them in there, which
// means we must not call free_symtab(&s->symtab), so we will leak it.
break;
}
upb_strtable_insert(&s->symtab, &e->e);
}
upb_rwlock_unlock(&s->lock);
}
upb_strtable_free(&tmp);
}
return;
}
void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status)
{
upb_msg *fds = upb_msg_new(s->fds_msgdef);
upb_msg_decodestr(fds, s->fds_msgdef, desc, status);
if(!upb_ok(status)) return;
upb_symtab_addfds(s, (google_protobuf_FileDescriptorSet*)fds, status);
upb_msg_unref(fds, s->fds_msgdef);
return;
}

@ -1,73 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Implements a upb_sink that writes protobuf data to the binary wire format.
*
* For messages that have any submessages, the encoder needs a buffer
* containing the submessage sizes, so they can be properly written at the
* front of each message. Note that groups do *not* have this requirement.
*
* Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_ENCODER_H_
#define UPB_ENCODER_H_
#include "upb.h"
#include "upb_sink.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_sizebuilder ************************************************************/
// A upb_sizebuilder performs a pre-pass on data to be serialized that gathers
// the sizes of submessages. This size data is required for serialization,
// because we have to know at the beginning of a submessage how many encoded
// bytes the submessage will represent.
struct upb_sizebuilder;
typedef struct upb_sizebuilder upb_sizebuilder;
upb_sizebuilder *upb_sizebuilder_new(upb_msgdef *md);
void upb_sizebuilder_free(upb_sizebuilder *sb);
void upb_sizebuilder_reset(upb_sizebuilder *sb);
// Returns a sink that must be used to perform the pre-pass. Note that the
// pre-pass *must* occur in the opposite order from the actual encode that
// follows, and the data *must* be identical both times (except for the
// reversed order.
upb_sink *upb_sizebuilder_sink(upb_sizebuilder *sb);
/* upb_encoder ****************************************************************/
// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
// buffer binary wire format.
struct upb_encoder;
typedef struct upb_encoder upb_encoder;
upb_encoder *upb_encoder_new(upb_msgdef *md);
void upb_encoder_free(upb_encoder *e);
// Resets the given upb_encoder such that is is ready to begin encoding. The
// upb_sizebuilder "sb" is used to determine submessage sizes; it must have
// previously been initialized by feeding it the same data in reverse order.
// "sb" may be null if and only if the data contains no submessages; groups
// are ok and do not require sizes to be precalculated. The upb_bytesink
// "out" is where the encoded output data will be sent.
//
// Both "sb" and "out" must live until the encoder is either reset or freed.
void upb_encoder_reset(upb_encoder *e, upb_sizebuilder *sb, upb_bytesink *out);
// The upb_sink to which data can be sent to be encoded. Note that this data
// must be identical to the data that was previously given to the sizebuilder
// (if any).
upb_sink *upb_encoder_sink(upb_encoder *e);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_ENCODER_H_ */

@ -1,20 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* This file, if compiled, will contain standalone (non-inlined) versions of
* all inline functions defined in header files. We don't generally use this
* file since we use "static inline" for inline functions (which will put a
* standalone version of the function in any .o file that needs it, but
* compiling this file and dumping the object file will let us inspect how
* inline functions are compiled, so we keep it around.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#define INLINE
#include "upb.h"
#include "upb_data.h"
#include "upb_def.h"
#include "upb_parse.h"
#include "upb_table.h"
#include "upb_text.h"

@ -1,155 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*
* upb_sink is a general purpose interface for pushing the contents of a
* protobuf from one component to another in a streaming fashion. We call the
* component that calls a upb_sink a "source". By "pushing" we mean that the
* source calls into the sink; the opposite (where a sink calls into the
* source) is known as "pull". In the push model the source gets the main
* loop; in a pull model the sink does.
*
* This interface is used as general-purpose glue in upb. For example, the
* parser interface works by implementing a source. Likewise the serialization
* simply implements a sink. Copying one protobuf to another is just a matter
* of using one message as a source and another as a sink.
*
* In terms of efficiency, we would generally expect "push" to be faster if the
* source had more state to track, and "pull" to be faster if the sink had more
* state. The reason is that whoever has the main loop can keep state on the
* stack (and possibly even in callee-save registers), whereas the the
* component that is "called into" always needs to reload its state from
* memory.
*
* In terms of programming complexity, it is easier and simpler to have the
* main loop, because you can store state in local variables.
*
* So the assumption inherent in using the push model is that sources are
* generally more complicated and stateful than consumers. For example, in the
* parser case, it has to deal with malformed input and associated errors; in
* comparison, the serializer deals with known-good input.
*/
#ifndef UPB_SINK_H
#define UPB_SINK_H
#include "upb_def.h"
#ifdef __cplusplus
extern "C" {
#endif
// Each of the upb_sink callbacks returns a status of this type.
typedef enum {
// The normal case, where the consumer wants to continue consuming.
UPB_SINK_CONTINUE,
// The sink did not consume this value, and wants to halt further processing.
// If the source is resumable, it should save the current state so that when
// resumed, the value that was just provided will be replayed.
UPB_SINK_STOP,
// The consumer wants to skip to the end of the current submessage and
// continue consuming. If we are at the top-level, the rest of the
// data is discarded.
UPB_SINK_SKIP
} upb_sink_status;
typedef struct {
struct upb_sink_callbacks *vtbl;
} upb_sink;
/* upb_sink callbacks *********************************************************/
// The value callback is called for a regular value (ie. not a string or
// submessage).
typedef upb_sink_status (*upb_value_cb)(upb_sink *s, upb_fielddef *f,
upb_value val, upb_status *status);
// The string callback is called for string data. "str" is the string in which
// the data lives, but it may contain more data than the effective string.
// "start" and "end" indicate the substring of "str" that is the effective
// string. If "start" is <0, this string is a continuation of the previous
// string for this field. If end > upb_strlen(str) then there is more data to
// follow for this string. "end" can also be used as a hint for how much data
// follows, but this is only a hint and is not guaranteed.
//
// The data is supplied this way to give you the opportunity to reference this
// data instead of copying it (perhaps using upb_strslice), or to minimize
// copying if it is unavoidable.
typedef upb_sink_status (*upb_str_cb)(upb_sink *s, upb_fielddef *f,
upb_strptr str,
int32_t start, uint32_t end,
upb_status *status);
// The start and end callbacks are called when a submessage begins and ends,
// respectively. The caller is responsible for ensuring that the nesting
// level never exceeds UPB_MAX_NESTING.
typedef upb_sink_status (*upb_start_cb)(upb_sink *s, upb_fielddef *f,
upb_status *status);
typedef upb_sink_status (*upb_end_cb)(upb_sink *s, upb_fielddef *f,
upb_status *status);
/* upb_sink implementation ****************************************************/
typedef struct upb_sink_callbacks {
upb_value_cb value_cb;
upb_str_cb str_cb;
upb_start_cb start_cb;
upb_end_cb end_cb;
} upb_sink_callbacks;
// These macros implement a mini virtual function dispatch for upb_sink instances.
// This allows functions that call upb_sinks to just write:
//
// upb_sink_onvalue(sink, field, val);
//
// The macro will handle the virtual function lookup and dispatch. We could
// potentially define these later to also be capable of calling a C++ virtual
// method instead of doing the virtual dispatch manually. This would make it
// possible to write C++ sinks in a more natural style without loss of
// efficiency. We could have a flag in upb_sink defining whether it is a C
// sink or a C++ one.
#define upb_sink_onvalue(s, f, val, status) s->vtbl->value_cb(s, f, val, status)
#define upb_sink_onstr(s, f, str, start, end, status) s->vtbl->str_cb(s, f, str, start, end, status)
#define upb_sink_onstart(s, f, status) s->vtbl->start_cb(s, f, status)
#define upb_sink_onend(s, f, status) s->vtbl->end_cb(s, f, status)
// Initializes a plain C visitor with the given vtbl. The sink must have been
// allocated separately.
INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) {
s->vtbl = vtbl;
}
/* upb_bytesink ***************************************************************/
// A upb_bytesink is like a upb_sync, but for bytes instead of structured
// protobuf data. Parsers implement upb_bytesink and push to a upb_sink,
// serializers do the opposite (implement upb_sink and push to upb_bytesink).
//
// The two simplest kinds of sinks are "write to string" and "write to FILE*".
// A forward declaration solely for the benefit of declaring upb_byte_cb below.
// Always prefer upb_bytesink (without the "struct" keyword) instead.
struct _upb_bytesink;
// The single bytesink callback; it takes the bytes to be written and returns
// how many were successfully written. If the return value is <0, the caller
// should stop processing.
typedef int32_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str,
uint32_t start, uint32_t end,
upb_status *status);
typedef struct _upb_bytesink {
upb_byte_cb *cb;
} upb_bytesink;
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -1,165 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*
* This file defines a simple string type. The overriding goal of upb_string
* is to avoid memcpy(), malloc(), and free() wheverever possible, while
* keeping both CPU and memory overhead low. Throughout upb there are
* situations where one wants to reference all or part of another string
* without copying. upb_string provides APIs for doing this.
*
* Characteristics of upb_string:
* - strings are reference-counted.
* - strings are logically immutable.
* - if a string has no other referents, it can be "recycled" into a new string
* without having to reallocate the upb_string.
* - strings can be substrings of other strings (owning a ref on the source
* string).
* - strings can refer to memory that they do not own, in which case we avoid
* copies if possible (the exact strategy for doing this can vary).
* - strings are not thread-safe by default, but can be made so by calling a
* function. This is not the default because it causes extra CPU overhead.
*/
#ifndef UPB_STRING_H
#define UPB_STRING_H
#include <assert.h>
#include <string.h>
#include "upb_atomic.h"
#ifdef __cplusplus
extern "C" {
#endif
// All members of this struct are private, and may only be read/written through
// the associated functions. Also, strings may *only* be allocated on the heap.
typedef struct _upb_string {
char *ptr;
uint32_t len;
uint32_t size;
upb_atomic_refcount_t refcount;
union {
// Used if this is a slice of another string.
struct _upb_string *src;
// Used if this string is referencing external unowned memory.
upb_stomic_refcount_t reader_count;
} extra;
} upb_string;
// Returns a newly-created, empty, non-finalized string. When the string is no
// longer needed, it should be unref'd, never freed directly.
upb_string *upb_string_new();
// Releases a ref on the given string, which may free the memory.
void upb_string_unref(upb_string *str);
// Returns a string with the same contents as "str". The caller owns a ref on
// the returned string, which may or may not be the same object as "str.
upb_string *upb_string_getref(upb_string *str);
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
// upb_string_getrobuf() and upb_string_endread() should be kept as short
// as possible. No other functions may be called on the string during this
// window except upb_string_len().
INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
INLINE void upb_string_endread(upb_string *str);
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. The returned string is either "str" if it could be
// recycled or a newly created string if "str" has other references.
upb_string *upb_string_tryrecycle(upb_string *str);
// The three options for setting the contents of a string. These may only be
// called when a string is first created or recycled; once other functions have
// been called on the string, these functions are not allowed until the string
// is recycled.
// Gets a pointer suitable for writing to the string, which is guaranteed to
// have at least "len" bytes of data available. The size of the string will
// become "len".
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
// Sets the contents of "str" to be the given substring of "target_str", to
// which the caller must own a ref.
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len);
// Makes the string "str" a reference to the given string data. The caller
// guarantees that the given string data will not change or be deleted until
// a matching call to upb_string_detach().
void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
void upb_string_detach(upb_string *str);
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
// overflow. These only use the public upb_string interface.
// More efficient than upb_strcmp if all you need is to test equality.
INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
upb_strlen_t len = upb_string_len(s1);
if(len != upb_string_len(s2)) {
return false;
} else {
bool ret =
memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
upb_string_endread(s1);
upb_string_endread(s2);
return ret;
}
}
// Like strcmp().
int upb_strcmp(upb_string *s1, upb_string *s2);
// Like upb_strcpy, but copies from a buffer and length.
INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
memcpy(upb_string_getrwbuf(dest, len), src, len);
}
// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src));
upb_string_endread(src);
}
// Like upb_strcpy, but copies from a NULL-terminated string.
INLINE void upb_strcpyc(upb_string *dest, const char *src) {
// This does two passes over src, but that is necessary unless we want to
// repeatedly re-allocate dst, which seems worse.
upb_strcpylen(dest, src, strlen(src));
}
// Returns a new string whose contents are a copy of s.
upb_string *upb_strdup(upb_string *s);
// Like upb_strdup(), but duplicates a given buffer and length.
INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
upb_string *s = upb_string_new();
upb_strcpylen(s, src, len);
return s;
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
upb_string *upb_strdupc(const char *src);
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
// Returns a new string that is a substring of the given string.
upb_string *upb_strslice(upb_string *s, int offset, int len);
// Reads an entire file into a newly-allocated string.
upb_string *upb_strreadfile(const char *filename);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -1,121 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include <inttypes.h>
#include "descriptor.h"
#include "upb_text.h"
#include "upb_data.h"
void upb_text_printval(upb_field_type_t type, upb_value val, FILE *file)
{
#define CASE(fmtstr, member) fprintf(file, fmtstr, val.member); break;
switch(type) {
case UPB_TYPE(DOUBLE):
CASE("%0.f", _double);
case UPB_TYPE(FLOAT):
CASE("%0.f", _float)
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64):
CASE("%" PRId64, int64)
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
CASE("%" PRIu64, uint64)
case UPB_TYPE(INT32):
case UPB_TYPE(SFIXED32):
case UPB_TYPE(SINT32):
CASE("%" PRId32, int32)
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
case UPB_TYPE(ENUM):
CASE("%" PRIu32, uint32);
case UPB_TYPE(BOOL):
CASE("%hhu", _bool);
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
/* TODO: escaping. */
fprintf(file, "\"" UPB_STRFMT "\"", UPB_STRARG(val.str)); break;
}
}
static void print_indent(upb_text_printer *p, FILE *stream)
{
if(!p->single_line)
for(int i = 0; i < p->indent_depth; i++)
fprintf(stream, " ");
}
void upb_text_printfield(upb_text_printer *p, upb_strptr name,
upb_field_type_t valtype, upb_value val,
FILE *stream)
{
print_indent(p, stream);
fprintf(stream, UPB_STRFMT ":", UPB_STRARG(name));
upb_text_printval(valtype, val, stream);
if(p->single_line)
fputc(' ', stream);
else
fputc('\n', stream);
}
void upb_text_push(upb_text_printer *p, upb_strptr submsg_type, FILE *stream)
{
print_indent(p, stream);
fprintf(stream, UPB_STRFMT " {", UPB_STRARG(submsg_type));
if(!p->single_line) fputc('\n', stream);
p->indent_depth++;
}
void upb_text_pop(upb_text_printer *p, FILE *stream)
{
p->indent_depth--;
print_indent(p, stream);
fprintf(stream, "}\n");
}
static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
FILE *stream);
static void printmsg(upb_text_printer *printer, upb_msg *msg, upb_msgdef *md,
FILE *stream)
{
for(upb_field_count_t i = 0; i < md->num_fields; i++) {
upb_fielddef *f = &md->fields[i];
if(!upb_msg_has(msg, f)) continue;
upb_value v = upb_msg_get(msg, f);
if(upb_isarray(f)) {
upb_arrayptr arr = v.arr;
for(uint32_t j = 0; j < upb_array_len(arr); j++) {
upb_value elem = upb_array_get(arr, f, j);
printval(printer, elem, f, stream);
}
} else {
printval(printer, v, f, stream);
}
}
}
static void printval(upb_text_printer *printer, upb_value v, upb_fielddef *f,
FILE *stream)
{
if(upb_issubmsg(f)) {
upb_text_push(printer, f->name, stream);
printmsg(printer, v.msg, upb_downcast_msgdef(f->def), stream);
upb_text_pop(printer, stream);
} else {
upb_text_printfield(printer, f->name, f->type, v, stream);
}
}
void upb_msg_print(upb_msg *msg, upb_msgdef *md, bool single_line,
FILE *stream)
{
upb_text_printer printer;
upb_text_printer_init(&printer, single_line);
printmsg(&printer, msg, md, stream);
}

@ -1,36 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_TEXT_H_
#define UPB_TEXT_H_
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
int indent_depth;
bool single_line;
} upb_text_printer;
INLINE void upb_text_printer_init(upb_text_printer *p, bool single_line) {
p->indent_depth = 0;
p->single_line = single_line;
}
void upb_text_printval(upb_field_type_t type, upb_value p, FILE *file);
void upb_text_printfield(upb_text_printer *p, upb_strptr name,
upb_field_type_t valtype, upb_value val, FILE *stream);
void upb_text_push(upb_text_printer *p, upb_strptr submsg_type,
FILE *stream);
void upb_text_pop(upb_text_printer *p, FILE *stream);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_TEXT_H_ */

@ -0,0 +1,429 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb_def.h"
/* Pure Decoding **************************************************************/
// The key fast-path varint-decoding routine. Here we can assume we have at
// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
// possibilities for optimization/experimentation here.
INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val,
upb_status *status) {
const char *p = *ptr;
uint32_t low, high = 0;
uint32_t b;
b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 28;
high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done;
upb_seterr(status, UPB_ERROR, "Unterminated varint");
return false;
done:
*ptr = p;
*val = ((uint64_t)high << 32) | low;
return true;
}
/* Decoding/Buffering of individual values ************************************/
// Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// The decoder keeps a stack with one entry per level of recursion.
// upb_decoder_frame is one frame of that stack.
typedef struct {
upb_msgdef *msgdef;
size_t end_offset; // For groups, 0.
} upb_decoder_frame;
struct upb_decoder {
// Immutable state of the decoder.
upb_src src;
upb_dispatcher dispatcher;
upb_bytesrc *bytesrc;
upb_msgdef *toplevel_msgdef;
upb_decoder_frame stack[UPB_MAX_NESTING];
// Mutable state of the decoder.
// Where we will store any errors that occur.
upb_status *status;
// Stack entries store the offset where the submsg ends (for groups, 0).
upb_decoder_frame *top, *limit;
// Current input buffer.
upb_string *buf;
// The offset within the overall stream represented by the *beginning* of buf.
size_t buf_stream_offset;
};
typedef struct {
// Our current position in the data buffer.
const char *ptr;
// End of this submessage, relative to *ptr.
const char *submsg_end;
// Number of bytes available at ptr.
size_t len;
// Msgdef for the current level.
upb_msgdef *msgdef;
} upb_dstate;
// Constant used to signal that the submessage is a group and therefore we
// don't know its end offset. This cannot be the offset of a real submessage
// end because it takes at least one byte to begin a submessage.
#define UPB_GROUP_END_OFFSET 0
#define UPB_MAX_VARINT_ENCODED_SIZE 10
INLINE void upb_dstate_advance(upb_dstate *s, size_t len) {
s->ptr += len;
s->len -= len;
}
INLINE void upb_dstate_setmsgend(upb_decoder *d, upb_dstate *s) {
s->submsg_end = (d->top->end_offset == UPB_GROUP_END_OFFSET) ?
(void*)UINTPTR_MAX :
upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset);
}
static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s);
// Called only from the slow path, this function copies the next "len" bytes
// from the stream to "data", adjusting the dstate appropriately.
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted,
upb_dstate *s) {
while (1) {
size_t to_copy = UPB_MIN(bytes_wanted, s->len);
memcpy(data, s->ptr, to_copy);
upb_dstate_advance(s, to_copy);
bytes_wanted -= to_copy;
if (bytes_wanted == 0) {
upb_dstate_setmsgend(d, s);
return true;
}
// Get next buffer.
if (d->buf) d->buf_stream_offset += upb_string_len(d->buf);
upb_string_recycle(&d->buf);
if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false;
s->ptr = upb_string_getrobuf(d->buf);
s->len = upb_string_len(d->buf);
}
}
// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous
// bytes available in our current buffer. We don't inline this because we
// accept that it will be slow and we don't want to pay for two copies of it.
static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s,
upb_value *val) {
char byte = 0x80;
uint64_t val64 = 0;
int bitpos;
for(bitpos = 0;
bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1, s);
bitpos += 7)
val64 |= ((uint64_t)byte & 0x7F) << bitpos;
if(bitpos == 70) {
upb_seterr(d->status, UPB_ERROR,
"Varint was unterminated after 10 bytes.\n");
return false;
} else if (d->status->code == UPB_EOF && bitpos == 0) {
// Regular EOF.
return false;
} else if (d->status->code == UPB_EOF && (byte & 0x80)) {
upb_seterr(d->status, UPB_ERROR,
"Provided data ended in the middle of a varint.\n");
return false;
} else {
// Success.
upb_value_setraw(val, val64);
return true;
}
}
typedef struct {
upb_wire_type_t wire_type;
upb_field_number_t field_number;
} upb_tag;
INLINE bool upb_decode_tag(upb_decoder *d, upb_dstate *s, upb_tag *tag) {
const char *p = s->ptr;
uint32_t tag_int;
upb_value val;
// Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048).
if (s->len < 2) goto slow; // unlikely.
tag_int = *p & 0x7f;
if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
tag_int |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
// Decode a full varint starting over from ptr.
if (!upb_decode_varint_slow(d, s, &val)) return false;
tag_int = upb_value_getint64(val);
p = s->ptr; // Trick the next line into not overwriting us.
done:
upb_dstate_advance(s, p - s->ptr);
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
return true;
}
INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) {
if (s->len >= UPB_MAX_VARINT_ENCODED_SIZE) {
// Common (fast) case.
uint64_t val64;
const char *p = s->ptr;
if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
upb_dstate_advance(s, p - s->ptr);
upb_value_setraw(val, val64);
return true;
} else {
return upb_decode_varint_slow(d, s, val);
}
}
INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt,
upb_dstate *s, upb_value *val) {
static const char table[] = {0, 8, 0, 0, 0, 4};
size_t bytes = table[wt];
if (s->len >= bytes) {
// Common (fast) case.
memcpy(&val, s->ptr, bytes);
upb_dstate_advance(s, bytes);
} else {
if (!upb_getbuf(d, &val, bytes, s)) return false;
}
return true;
}
// "val" initially holds the length of the string, this is replaced by the
// contents of the string.
INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str,
upb_dstate *s) {
upb_string_recycle(str);
uint32_t strlen = upb_value_getint32(*val);
if (s->len >= strlen) {
// Common (fast) case.
upb_string_substr(*str, d->buf, s->ptr - upb_string_getrobuf(d->buf), strlen);
upb_dstate_advance(s, strlen);
} else {
if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s))
return false;
}
upb_value_setstr(val, *str);
return true;
}
/* The main decoding loop *****************************************************/
extern upb_wire_type_t upb_expected_wire_types[];
// Returns true if wt is the correct on-the-wire type for ft.
INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) {
// This doesn't currently support packed arrays.
return upb_types[ft].native_wire_type == wt;
}
static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f,
upb_value submsg_len, upb_fieldtype_t type) {
d->top++;
if(d->top >= d->limit) {
upb_seterr(d->status, UPB_ERROR, "Nesting too deep.");
return UPB_ERROR;
}
d->top->end_offset = (type == UPB_TYPE(GROUP)) ?
UPB_GROUP_END_OFFSET :
d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) +
upb_value_getint32(submsg_len);
d->top->msgdef = upb_downcast_msgdef(f->def);
upb_dstate_setmsgend(d, s);
return upb_dispatch_startsubmsg(&d->dispatcher, f);
}
static upb_flow_t upb_pop(upb_decoder *d, upb_dstate *s) {
d->top--;
upb_dstate_setmsgend(d, s);
return upb_dispatch_endsubmsg(&d->dispatcher);
}
void upb_decoder_run(upb_src *src, upb_status *status) {
upb_decoder *d = (upb_decoder*)src;
d->status = status;
// We put our dstate on the stack so the compiler knows they can't be changed
// by external code (like when we dispatch a callback). We must be sure not
// to let its address escape this source file.
upb_dstate state = {NULL, (void*)0x1, 0, d->top->msgdef};
upb_string *str = NULL;
// TODO: handle UPB_SKIPSUBMSG
#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err
#define CHECK(expr) if (!expr) goto err;
CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher));
// Main loop: executed once per tag/field pair.
while(1) {
// Check for end-of-submessage.
while (state.ptr >= state.submsg_end) {
if (state.ptr > state.submsg_end) {
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
goto err;
}
CHECK_FLOW(upb_pop(d, &state));
}
// Parse/handle tag.
upb_tag tag;
if (!upb_decode_tag(d, &state, &tag)) {
if (status->code == UPB_EOF && d->top == d->stack) {
// Normal end-of-file.
upb_clearerr(status);
CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher));
upb_string_unref(str);
return;
} else {
if (status->code == UPB_EOF) {
upb_seterr(status, UPB_ERROR,
"Input ended in the middle of a submessage.");
}
goto err;
}
}
// Decode wire data. Hopefully this branch will predict pretty well
// since most types will read a varint here.
upb_value val;
switch (tag.wire_type) {
case UPB_WIRE_TYPE_START_GROUP:
break; // Nothing to do now, below we will push appropriately.
case UPB_WIRE_TYPE_END_GROUP:
if(d->top->end_offset != UPB_GROUP_END_OFFSET) {
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag.");
goto err;
}
CHECK_FLOW(upb_pop(d, &state));
continue; // We have no value to dispatch.
case UPB_WIRE_TYPE_VARINT:
case UPB_WIRE_TYPE_DELIMITED:
// For the delimited case we are parsing the length.
CHECK(upb_decode_varint(d, &state, &val));
break;
case UPB_WIRE_TYPE_32BIT:
case UPB_WIRE_TYPE_64BIT:
CHECK(upb_decode_fixed(d, tag.wire_type, &state, &val));
break;
}
// Look up field by tag number.
upb_fielddef *f = upb_msgdef_itof(d->top->msgdef, tag.field_number);
if (!f) {
if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED)
CHECK(upb_decode_string(d, &val, &str, &state));
CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, tag.field_number, val));
} else if (!upb_check_type(tag.wire_type, f->type)) {
// TODO: put more details in this error msg.
upb_seterr(status, UPB_ERROR, "Field had incorrect type.");
goto err;
}
// Perform any further massaging of the data now that we have the fielddef.
// Now we can distinguish strings from submessages, and we know about
// zig-zag-encoded types.
// TODO: handle packed encoding.
// TODO: if we were being paranoid, we could check for 32-bit-varint types
// that the top 32 bits all match the highest bit of the low 32 bits.
// If this is not true we are losing data. But the main protobuf library
// doesn't check this, and it would slow us down, so pass for now.
switch (f->type) {
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP):
CHECK_FLOW(upb_push(d, &state, f, val, f->type));
continue; // We have no value to dispatch.
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
CHECK(upb_decode_string(d, &val, &str, &state));
break;
case UPB_TYPE(SINT32):
upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val)));
break;
case UPB_TYPE(SINT64):
upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val)));
break;
default:
break; // Other types need no further processing at this point.
}
CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val));
}
err:
upb_string_unref(str);
if (upb_ok(status)) {
upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK");
}
}
void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) {
upb_decoder *d = (upb_decoder*)src;
upb_dispatcher_reset(&d->dispatcher, handlers);
d->top = d->stack;
d->buf_stream_offset = 0;
d->top->msgdef = d->toplevel_msgdef;
// The top-level message is not delimited (we can keep receiving data for it
// indefinitely), so we treat it like a group.
d->top->end_offset = 0;
}
upb_decoder *upb_decoder_new(upb_msgdef *msgdef) {
static upb_src_vtbl vtbl = {
&upb_decoder_sethandlers,
&upb_decoder_run,
};
upb_decoder *d = malloc(sizeof(*d));
upb_src_init(&d->src, &vtbl);
upb_dispatcher_init(&d->dispatcher);
d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING];
d->buf = NULL;
return d;
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) {
d->bytesrc = bytesrc;
d->top = &d->stack[0];
d->top->msgdef = d->toplevel_msgdef;
// Never want to end top-level message, so treat it like a group.
d->top->end_offset = UPB_GROUP_END_OFFSET;
upb_string_unref(d->buf);
d->buf = NULL;
}
void upb_decoder_free(upb_decoder *d) {
upb_string_unref(d->buf);
free(d);
}
upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; }

@ -0,0 +1,53 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* upb_decoder implements a high performance, streaming decoder for protobuf
* data that works by implementing upb_src and getting its data from a
* upb_bytesrc.
*
* The decoder does not currently support non-blocking I/O, in the sense that
* if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
* decoder when data becomes available again. Support for this could be added,
* but it would add complexity and perhaps cost efficiency also.
*
* Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
#include <stdbool.h>
#include <stdint.h>
#include "upb_def.h"
#include "upb_stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_decoder *****************************************************************/
// A upb_decoder decodes the binary protocol buffer format, writing the data it
// decodes to a upb_sink.
struct upb_decoder;
typedef struct upb_decoder upb_decoder;
// Allocates and frees a upb_decoder, respectively.
upb_decoder *upb_decoder_new(upb_msgdef *md);
void upb_decoder_free(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
// state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Parsers must be reset before they can be
// used. A decoder can be reset multiple times.
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc);
// Returns a upb_src pointer by which the decoder can be used. The returned
// upb_src is invalidated by upb_decoder_reset() or upb_decoder_free().
upb_src *upb_decoder_src(upb_decoder *d);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DECODER_H_ */

@ -38,9 +38,6 @@ static size_t upb_f_uint32_t_size(uint32_t val) {
return sizeof(uint32_t);
}
// The biggest possible single value is a 10-byte varint.
#define UPB_MAX_ENCODED_SIZE 10
/* Functions to write wire values. ********************************************/

@ -0,0 +1,56 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Implements a upb_sink that writes protobuf data to the binary wire format.
*
* For messages that have any submessages, the encoder needs a buffer
* containing the submessage sizes, so they can be properly written at the
* front of each message. Note that groups do *not* have this requirement.
*
* Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_ENCODER_H_
#define UPB_ENCODER_H_
#include "upb.h"
#include "upb_srcsink.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_encoder ****************************************************************/
// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
// buffer binary wire format.
struct upb_encoder;
typedef struct upb_encoder upb_encoder;
upb_encoder *upb_encoder_new(upb_msgdef *md);
void upb_encoder_free(upb_encoder *e);
// Resets the given upb_encoder such that is is ready to begin encoding,
// outputting data to "bytesink" (which must live until the encoder is
// reset or destroyed).
void upb_encoder_reset(upb_encoder *e, upb_bytesink *bytesink);
// Returns the upb_sink to which data can be written. The sink is invalidated
// when the encoder is reset or destroyed. Note that if the client wants to
// encode any length-delimited submessages it must first call
// upb_encoder_buildsizes() below.
upb_sink *upb_encoder_sink(upb_encoder *e);
// Call prior to pushing any data with embedded submessages. "src" must yield
// exactly the same data as what will next be encoded, but in reverse order.
// The encoder iterates over this data in order to determine the sizes of the
// submessages. If any errors are returned by the upb_src, the status will
// be saved in *status. If the client is sure that the upb_src will not throw
// any errors, "status" may be NULL.
void upb_encoder_buildsizes(upb_encoder *e, upb_src *src, upb_status *status);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_ENCODER_H_ */

@ -0,0 +1,104 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*/
#include "upb_stdio.h"
#include <stddef.h>
#include <stdlib.h>
#include "upb_string.h"
// We can make this configurable if necessary.
#define BLOCK_SIZE 4096
struct upb_stdio {
upb_bytesrc bytesrc;
upb_bytesink bytesink;
FILE *file;
};
void upb_stdio_reset(upb_stdio *stdio, FILE* file) {
stdio->file = file;
}
/* upb_bytesrc methods ********************************************************/
static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf,
upb_strlen_t count, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)src;
assert(count > 0);
size_t read = fread(buf, 1, count, stdio->file);
if(read < (size_t)count) {
// Error or EOF.
if(feof(stdio->file)) {
upb_seterr(status, UPB_EOF, "");
return read;
} else if(ferror(stdio->file)) {
upb_seterr(status, UPB_ERROR, "Error reading from stdio stream.");
return -1;
}
}
return read;
}
static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str,
upb_status *status) {
upb_strlen_t read = upb_stdio_read(
src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status);
if (read <= 0) return false;
upb_string_getrwbuf(str, read);
return true;
}
/* upb_bytesink methods *******************************************************/
upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if(written < len) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
return written;
}
upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
const char *fmt, va_list args) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
upb_strlen_t written = vfprintf(stdio->file, fmt, args);
if (written < 0) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
return written;
}
upb_stdio *upb_stdio_new() {
static upb_bytesrc_vtbl bytesrc_vtbl = {
upb_stdio_read,
upb_stdio_getstr,
};
static upb_bytesink_vtbl bytesink_vtbl = {
NULL,
upb_stdio_putstr,
upb_stdio_vprintf
};
upb_stdio *stdio = malloc(sizeof(*stdio));
upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl);
upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
return stdio;
}
void upb_stdio_free(upb_stdio *stdio) {
free(stdio);
}
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; }

@ -0,0 +1,42 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* This file provides upb_bytesrc and upb_bytesink implementations for
* ANSI C stdio.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*/
#include <stdio.h>
#include "upb_stream.h"
#ifndef UPB_STDIO_H_
#define UPB_STDIO_H_
#ifdef __cplusplus
extern "C" {
#endif
struct upb_stdio;
typedef struct upb_stdio upb_stdio;
// Creation/deletion.
upb_stdio *upb_stdio_new();
void upb_stdio_free(upb_stdio *stdio);
// Reset/initialize the object for use. The src or sink will call
// fread()/fwrite()/etc. on the given FILE*.
void upb_stdio_reset(upb_stdio *stdio, FILE* file);
// Gets a bytesrc or bytesink for the given stdio. The returned pointer is
// invalidated by upb_stdio_reset above. It is perfectly valid to get both
// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading
// and writing.
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -0,0 +1,71 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*/
#include "upb_strstream.h"
#include <stdlib.h>
#include "upb_string.h"
struct upb_stringsrc {
upb_bytesrc bytesrc;
upb_string *str;
upb_strlen_t offset;
};
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) {
if (str != s->str) {
if (s->str) upb_string_unref(s->str);
s->str = upb_string_getref(str);
}
s->bytesrc.eof = false;
}
void upb_stringsrc_free(upb_stringsrc *s) {
if (s->str) upb_string_unref(s->str);
free(s);
}
static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf,
upb_strlen_t count, upb_status *status) {
upb_stringsrc *src = (upb_stringsrc*)_src;
if (src->offset == upb_string_len(src->str)) {
upb_seterr(status, UPB_EOF, "");
return -1;
} else {
upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset);
memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read);
src->offset += to_read;
return to_read;
}
}
static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str,
upb_status *status) {
upb_stringsrc *src = (upb_stringsrc*)_src;
if (src->offset == upb_string_len(str)) {
upb_seterr(status, UPB_EOF, "");
return false;
} else {
upb_string_substr(str, src->str, 0, upb_string_len(src->str));
return true;
}
}
upb_stringsrc *upb_stringsrc_new() {
static upb_bytesrc_vtbl bytesrc_vtbl = {
upb_stringsrc_read,
upb_stringsrc_getstr,
};
upb_stringsrc *s = malloc(sizeof(*s));
s->str = NULL;
upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl);
return s;
}
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc;
}

@ -0,0 +1,61 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* This file contains upb_bytesrc and upb_bytesink implementations for
* upb_string.
*
* Copyright (c) 2009-2010 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_STRSTREAM_H
#define UPB_STRSTREAM_H
#include "upb_stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_stringsrc **************************************************************/
struct upb_stringsrc;
typedef struct upb_stringsrc upb_stringsrc;
// Create/free a stringsrc.
upb_stringsrc *upb_stringsrc_new();
void upb_stringsrc_free(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// stringsrc will take a reference on the string, so the caller need not ensure
// that it outlives the stringsrc. A stringsrc can be reset multiple times.
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above.
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
/* upb_stringsink *************************************************************/
struct upb_stringsink;
typedef struct upb_stringsink upb_stringsink;
// Create/free a stringsrc.
upb_stringsink *upb_stringsink_new();
void upb_stringsink_free(upb_stringsink *s);
// Gets a string containing the data that has been written to this stringsink.
// The caller does *not* own any references to this string.
upb_string *upb_stringsink_getstring(upb_stringsink *s);
// Clears the internal string of accumulated data, resetting it to empty.
void upb_stringsink_reset(upb_stringsink *s);
// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsrc_bytesink();
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -0,0 +1,143 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_textprinter.h"
#include <inttypes.h>
#include <stdlib.h>
#include "upb_def.h"
#include "upb_string.h"
struct _upb_textprinter {
upb_bytesink *bytesink;
int indent_depth;
bool single_line;
upb_status status;
};
#define CHECK(x) if ((x) < 0) goto err;
static int upb_textprinter_indent(upb_textprinter *p)
{
if(!p->single_line)
for(int i = 0; i < p->indent_depth; i++)
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status));
return 0;
err:
return -1;
}
static int upb_textprinter_endfield(upb_textprinter *p) {
if(p->single_line) {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status));
} else {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status));
}
return 0;
err:
return -1;
}
static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f,
upb_value val) {
upb_textprinter *p = _p;
upb_textprinter_indent(p);
CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name)));
#define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) {
case UPB_TYPE(DOUBLE):
CASE("%0.f", double);
case UPB_TYPE(FLOAT):
CASE("%0.f", float)
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64):
CASE("%" PRId64, int64)
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
CASE("%" PRIu64, uint64)
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
CASE("%" PRIu32, uint32);
case UPB_TYPE(ENUM): {
upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
upb_string *enum_label =
upb_enumdef_iton(enum_def, upb_value_getint32(val));
if (enum_label) {
// We found a corresponding string for this enum. Otherwise we fall
// through to the int32 code path.
CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status));
break;
}
}
case UPB_TYPE(INT32):
case UPB_TYPE(SFIXED32):
case UPB_TYPE(SINT32):
CASE("%" PRId32, int32)
case UPB_TYPE(BOOL):
CASE("%hhu", bool);
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
// TODO: escaping.
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status))
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
break;
}
upb_textprinter_endfield(p);
return UPB_CONTINUE;
err:
return UPB_BREAK;
}
static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f,
upb_handlers *delegate_to) {
(void)delegate_to;
upb_textprinter *p = _p;
upb_textprinter_indent(p);
CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT " {", UPB_STRARG(f->name)));
if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status);
p->indent_depth++;
return UPB_CONTINUE;
err:
return UPB_BREAK;
}
static upb_flow_t upb_textprinter_endsubmsg(void *_p)
{
upb_textprinter *p = _p;
p->indent_depth--;
upb_textprinter_indent(p);
upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status);
upb_textprinter_endfield(p);
return UPB_CONTINUE;
}
upb_textprinter *upb_textprinter_new() {
upb_textprinter *p = malloc(sizeof(*p));
return p;
}
void upb_textprinter_free(upb_textprinter *p) {
free(p);
}
void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers,
upb_bytesink *sink, bool single_line) {
static upb_handlerset handlerset = {
NULL, // startmsg
NULL, // endmsg
upb_textprinter_value,
upb_textprinter_startsubmsg,
upb_textprinter_endsubmsg,
};
p->bytesink = sink;
p->single_line = single_line;
p->indent_depth = 0;
upb_register_handlerset(handlers, &handlerset);
upb_set_handler_closure(handlers, p, &p->status);
}

@ -0,0 +1,29 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_TEXT_H_
#define UPB_TEXT_H_
#include "upb_stream.h"
#ifdef __cplusplus
extern "C" {
#endif
struct _upb_textprinter;
typedef struct _upb_textprinter upb_textprinter;
upb_textprinter *upb_textprinter_new();
void upb_textprinter_free(upb_textprinter *p);
void upb_textprinter_reset(upb_textprinter *p, upb_handlers *handlers,
upb_bytesink *sink, bool single_line);
void upb_textprinter_sethandlers(upb_textprinter *p, upb_handlers *h);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_TEXT_H_ */

@ -0,0 +1,42 @@
#include "upb_decoder.h"
#include "upb_textprinter.h"
#include "upb_stdio.h"
int main() {
upb_symtab *symtab = upb_symtab_new();
upb_symtab_add_descriptorproto(symtab);
upb_def *fds = upb_symtab_lookup(
symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
upb_stdio *in = upb_stdio_new();
upb_stdio_reset(in, stdin);
upb_stdio *out = upb_stdio_new();
upb_stdio_reset(out, stdout);
upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds));
upb_decoder_reset(d, upb_stdio_bytesrc(in));
upb_textprinter *p = upb_textprinter_new();
upb_handlers handlers;
upb_handlers_init(&handlers);
upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false);
upb_src *src = upb_decoder_src(d);
upb_src_sethandlers(src, &handlers);
upb_status status = UPB_STATUS_INIT;
upb_src_run(src, &status);
assert(upb_ok(&status));
upb_status_uninit(&status);
upb_stdio_free(in);
upb_stdio_free(out);
upb_decoder_free(d);
upb_textprinter_free(p);
upb_def_unref(fds);
upb_symtab_unref(symtab);
// Prevent C library from holding buffers open, so Valgrind doesn't see
// memory leaks.
fclose(stdin);
fclose(stdout);
}

@ -0,0 +1,25 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_def.h"
#include <stdlib.h>
int main() {
upb_symtab *s = upb_symtab_new();
upb_symtab_add_descriptorproto(s);
int count;
upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY);
for (int i = 0; i < count; i++) {
upb_def_unref(defs[i]);
}
free(defs);
upb_string *str = upb_strdupc("google.protobuf.FileDescriptorSet");
upb_def *fds = upb_symtab_lookup(s, str);
assert(fds != NULL);
assert(upb_dyncast_msgdef(fds) != NULL);
upb_def_unref(fds);
upb_string_unref(str);
upb_symtab_unref(s);
return 0;
}

@ -0,0 +1,127 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_stream.h"
#include "upb_string.h"
typedef struct {
upb_string *str;
bool should_delegate;
} test_data;
extern upb_handlerset test_handlers;
static void strappendf(upb_string *s, const char *format, ...) {
upb_string *str = upb_string_new();
va_list args;
va_start(args, format);
upb_string_vprintf(str, format, args);
va_end(args);
upb_strcat(s, str);
upb_string_unref(str);
}
static upb_flow_t startmsg(void *closure) {
test_data *d = closure;
strappendf(d->str, "startmsg\n");
return UPB_CONTINUE;
}
static upb_flow_t endmsg(void *closure) {
test_data *d = closure;
strappendf(d->str, "endmsg\n");
return UPB_CONTINUE;
}
static upb_flow_t value(void *closure, struct _upb_fielddef *f, upb_value val) {
(void)f;
test_data *d = closure;
strappendf(d->str, "value, %lld\n", upb_value_getint64(val));
return UPB_CONTINUE;
}
static upb_flow_t startsubmsg(void *closure, struct _upb_fielddef *f,
upb_handlers *delegate_to) {
(void)f;
test_data *d = closure;
strappendf(d->str, "startsubmsg\n");
if (d->should_delegate) {
upb_register_handlerset(delegate_to, &test_handlers);
upb_set_handler_closure(delegate_to, closure, NULL);
return UPB_DELEGATE;
} else {
return UPB_CONTINUE;
}
}
static upb_flow_t endsubmsg(void *closure) {
test_data *d = closure;
strappendf(d->str, "endsubmsg\n");
return UPB_CONTINUE;
}
static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum,
upb_value val) {
(void)val;
test_data *d = closure;
strappendf(d->str, "unknownval, %d\n", fieldnum);
return UPB_CONTINUE;
}
upb_handlerset test_handlers = {
&startmsg,
&endmsg,
&value,
&startsubmsg,
&endsubmsg,
&unknownval,
};
static void test_dispatcher() {
test_data data;
data.should_delegate = false;
data.str = upb_string_new();
upb_handlers h;
upb_handlers_init(&h);
upb_handlers_reset(&h);
upb_register_handlerset(&h, &test_handlers);
upb_set_handler_closure(&h, &data, NULL);
upb_dispatcher d;
upb_dispatcher_init(&d);
upb_dispatcher_reset(&d, &h);
upb_dispatch_startmsg(&d);
upb_value val;
upb_value_setint64(&val, 5);
upb_dispatch_value(&d, NULL, val);
upb_dispatch_startsubmsg(&d, NULL);
data.should_delegate = true;
upb_dispatch_startsubmsg(&d, NULL);
data.should_delegate = false;
upb_dispatch_startsubmsg(&d, NULL);
upb_dispatch_value(&d, NULL, val);
upb_dispatch_endsubmsg(&d);
upb_dispatch_endsubmsg(&d);
upb_dispatch_endsubmsg(&d);
upb_dispatch_endmsg(&d);
upb_string expected = UPB_STACK_STRING(
"startmsg\n"
"value, 5\n"
"startsubmsg\n"
"startsubmsg\n"
"startmsg\n" // Because of the delegation.
"startsubmsg\n"
"value, 5\n"
"endsubmsg\n"
"endmsg\n" // Because of the delegation.
"endsubmsg\n"
"endsubmsg\n"
"endmsg\n");
assert(upb_streql(data.str, &expected));
upb_string_unref(data.str);
}
int main() {
test_dispatcher();
return 0;
}

@ -0,0 +1,126 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_string.h"
char static_str[] = "Static string.";
upb_string static_upbstr = UPB_STATIC_STRING(static_str);
static void test_static() {
// Static string is initialized appropriately.
assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string.")));
// Taking a ref on a static string returns the same string, and repeated
// refs don't get the string in a confused state.
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
// Unreffing a static string does nothing (is not harmful).
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
// Recycling a static string returns a new string (that can be modified).
upb_string *str = &static_upbstr;
upb_string_recycle(&str);
assert(str != &static_upbstr);
upb_string_unref(str);
}
static void test_dynamic() {
upb_string *str = upb_string_new();
assert(str != NULL);
upb_string_unref(str);
// Can also create a string by recycle(NULL).
str = NULL;
upb_string_recycle(&str);
assert(str != NULL);
// Take a ref and recycle; should create a new string and release a ref
// on the old one.
upb_string *strcp = upb_string_getref(str);
assert(strcp == str);
assert(upb_atomic_read(&str->refcount) == 2);
upb_string_recycle(&str);
assert(strcp != str);
assert(upb_atomic_read(&str->refcount) == 1);
assert(upb_atomic_read(&strcp->refcount) == 1);
upb_string_unref(strcp);
upb_strcpyc(str, static_str);
assert(upb_string_len(str) == (sizeof(static_str) - 1));
const char *robuf = upb_string_getrobuf(str);
assert(robuf != NULL);
assert(upb_streqlc(str, static_str));
upb_string_endread(str);
upb_string *str2 = str;
upb_string_recycle(&str2);
// No other referents, so should return the same string.
assert(str2 == str);
// Write a shorter string, the same memory should be reused.
upb_strcpyc(str, "XX");
const char *robuf2 = upb_string_getrobuf(str);
assert(robuf2 == robuf);
assert(upb_streqlc(str, "XX"));
assert(upb_streql(str, UPB_STRLIT("XX")));
// Make string alias part of another string.
str2 = upb_strdupc("WXYZ");
upb_string_recycle(&str);
upb_string_substr(str, str2, 1, 2);
assert(upb_string_len(str) == 2);
assert(upb_string_len(str2) == 4);
// The two string should be aliasing the same data.
const char *robuf3 = upb_string_getrobuf(str);
const char *robuf4 = upb_string_getrobuf(str2);
assert(robuf3 == robuf4 + 1);
// The aliased string should have an extra ref.
assert(upb_atomic_read(&str2->refcount) == 2);
// Recycling str should eliminate the extra ref.
upb_string_recycle(&str);
assert(upb_atomic_read(&str2->refcount) == 1);
// Resetting str should reuse its old data.
upb_strcpyc(str, "XX");
const char *robuf5 = upb_string_getrobuf(str);
assert(robuf5 == robuf);
// Resetting str to something very long should require new data to be
// allocated.
upb_string_recycle(&str);
const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
upb_strcpyc(str, longstring);
const char *robuf6 = upb_string_getrobuf(str);
assert(robuf6 != robuf);
assert(upb_streqlc(str, longstring));
// Test printf.
upb_string_recycle(&str);
upb_string_printf(str, "Number: %d, String: %s", 5, "YO!");
assert(upb_streqlc(str, "Number: 5, String: YO!"));
// Test asprintf
upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n",
"Josh", UPB_STRARG(str));
const char expected[] = "Yo Josh: Number: 5, String: YO!\n";
assert(upb_streqlc(str3, expected));
upb_string_unref(str);
upb_string_unref(str2);
upb_string_unref(str3);
// Unref of NULL is harmless.
upb_string_unref(NULL);
}
int main() {
test_static();
test_dynamic();
}

@ -1,7 +1,7 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_table.h"
#include "upb_data.h"
#include "upb_string.h"
#include "test_util.h"
#include <assert.h>
#include <map>
@ -12,6 +12,8 @@
#include <sys/resource.h>
#include <iostream>
bool benchmark = false;
using std::string;
using std::vector;
@ -45,7 +47,7 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
all.insert(key);
strtable_entry e;
e.value = key[0];
upb_strptr str = upb_strduplen(key.c_str(), key.size());
upb_string *str = upb_strduplen(key.c_str(), key.size());
e.e.key = str;
upb_strtable_insert(&table, &e.e);
upb_string_unref(str); // The table still owns a ref.
@ -55,7 +57,7 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
/* Test correctness. */
for(uint32_t i = 0; i < keys.size(); i++) {
const string& key = keys[i];
upb_strptr str = upb_strduplen(key.c_str(), key.size());
upb_string *str = upb_strduplen(key.c_str(), key.size());
strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str);
if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
assert(e);
@ -71,7 +73,7 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
strtable_entry *e;
for(e = (strtable_entry*)upb_strtable_begin(&table); e;
e = (strtable_entry*)upb_strtable_next(&table, &e->e)) {
string tmp(upb_string_getrobuf(e->e.key), upb_strlen(e->e.key));
string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key));
std::set<string>::iterator i = all.find(tmp);
assert(i != all.end());
all.erase(i);
@ -116,6 +118,11 @@ void test_inttable(int32_t *keys, size_t num_entries)
}
}
if(!benchmark) {
upb_inttable_free(&table);
return;
}
/* Test performance. We only test lookups for keys that are known to exist. */
uintptr_t x = 0;
const unsigned int iterations = 0xFFFFFF;
@ -219,8 +226,12 @@ int32_t *get_contiguous_keys(int32_t num)
return buf;
}
int main()
int main(int argc, char *argv[])
{
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
}
vector<string> keys;
keys.push_back("google.protobuf.FileDescriptorSet");
keys.push_back("google.protobuf.FileDescriptorProto");

@ -4,9 +4,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
#include "upb_data.h"
#include "upb_msg.h"
#include "upb_def.h"
#include "upb_decoder.h"
#include "upb_strstream.h"
int num_assertions = 0;
#define ASSERT(expr) do { \
@ -25,7 +26,7 @@ void compare_arrays(const google::protobuf::Reflection *r,
upb_msg *upb_msg, upb_fielddef *upb_f)
{
ASSERT(upb_msg_has(upb_msg, upb_f));
upb_arrayptr arr = upb_msg_get(upb_msg, upb_f).arr;
upb_array *arr = upb_msg_get(upb_msg, upb_f).arr;
ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f));
for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) {
upb_value v = upb_array_get(arr, upb_f, i);
@ -63,7 +64,7 @@ void compare_arrays(const google::protobuf::Reflection *r,
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str));
std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str));
ASSERT(str == str2);
break;
}
@ -116,7 +117,7 @@ void compare_values(const google::protobuf::Reflection *r,
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
std::string str = r->GetString(proto2_msg, proto2_f);
std::string str2(upb_string_getrobuf(v.str), upb_strlen(v.str));
std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str));
ASSERT(str == str2);
break;
}
@ -133,9 +134,10 @@ void compare(const google::protobuf::Message& proto2_msg,
const google::protobuf::Reflection *r = proto2_msg.GetReflection();
const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor();
ASSERT((upb_field_count_t)d->field_count() == upb_md->num_fields);
for(upb_field_count_t i = 0; i < upb_md->num_fields; i++) {
upb_fielddef *upb_f = &upb_md->fields[i];
ASSERT((upb_field_count_t)d->field_count() == upb_msgdef_numfields(upb_md));
upb_msg_iter i;
for(i = upb_msg_begin(upb_md); !upb_msg_done(i); i = upb_msg_next(upb_md, i)) {
upb_fielddef *upb_f = upb_msg_iter_field(i);
const google::protobuf::FieldDescriptor *proto2_f =
d->FindFieldByNumber(upb_f->number);
// Make sure the definitions are equal.
@ -143,7 +145,7 @@ void compare(const google::protobuf::Message& proto2_msg,
ASSERT(proto2_f);
ASSERT(upb_f->number == proto2_f->number());
ASSERT(std::string(upb_string_getrobuf(upb_f->name),
upb_strlen(upb_f->name)) ==
upb_string_len(upb_f->name)) ==
proto2_f->name());
ASSERT(upb_f->type == proto2_f->type());
ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated());
@ -166,10 +168,10 @@ void compare(const google::protobuf::Message& proto2_msg,
void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
upb_msg *upb_msg, upb_msgdef *upb_md,
upb_strptr str)
upb_string *str)
{
// Parse to both proto2 and upb.
ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_strlen(str)));
ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str)));
upb_status status = UPB_STATUS_INIT;
upb_msg_decodestr(upb_msg, upb_md, str, &status);
ASSERT(upb_ok(&status));
@ -194,22 +196,32 @@ int main(int argc, char *argv[])
// Initialize upb state, parse descriptor.
upb_status status = UPB_STATUS_INIT;
upb_symtab *c = upb_symtab_new();
upb_strptr fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(upb_string_isnull(fds)) {
upb_symtab *symtab = upb_symtab_new();
upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(fds == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
return 1;
}
upb_symtab_add_desc(c, fds, &status);
upb_symtab_add_descriptorproto(symtab);
upb_def *fds_msgdef = upb_symtab_lookup(
symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
upb_stringsrc *ssrc = upb_stringsrc_new();
upb_stringsrc_reset(ssrc, fds);
upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef));
upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc));
upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": %s.\n",
status.msg);
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": ");
upb_printerr(&status);
return 1;
}
upb_string_unref(fds);
upb_decoder_free(decoder);
upb_stringsrc_free(ssrc);
upb_strptr proto_name = upb_strdupc(MESSAGE_NAME);
upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(c, proto_name));
upb_string *proto_name = upb_strdupc(MESSAGE_NAME);
upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name));
if(!def) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(proto_name));
@ -218,8 +230,8 @@ int main(int argc, char *argv[])
upb_string_unref(proto_name);
// Read the message data itself.
upb_strptr str = upb_strreadfile(MESSAGE_FILE);
if(upb_string_isnull(str)) {
upb_string *str = upb_strreadfile(MESSAGE_FILE);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return 1;
}
@ -234,7 +246,7 @@ int main(int argc, char *argv[])
upb_msg_unref(upb_msg, def);
upb_def_unref(UPB_UPCAST(def));
upb_string_unref(str);
upb_symtab_unref(c);
upb_symtab_unref(symtab);
return 0;
}

@ -0,0 +1,497 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 45;
objects = {
/* Begin PBXBuildFile section */
420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4F11F24F3E0076AD28 /* test_decoder.c */; };
420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */; };
420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */; };
42BD1D6E11F2500D0076AD28 /* upb.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3211F24E4C0076AD28 /* upb.c */; };
42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2911F24E4C0076AD28 /* upb_def.c */; };
42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2C11F24E4C0076AD28 /* upb_stream.c */; };
42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D2E11F24E4C0076AD28 /* upb_string.c */; };
42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3011F24E4C0076AD28 /* upb_table.c */; };
42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D3E11F24EA30076AD28 /* upb_decoder.c */; };
42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4011F24EA30076AD28 /* upb_stdio.c */; };
42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D4211F24EA30076AD28 /* upb_textprinter.c */; };
42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8D11F251820076AD28 /* descriptor_const.h */; };
42BD1D9111F251820076AD28 /* descriptor.c in Sources */ = {isa = PBXBuildFile; fileRef = 42BD1D8E11F251820076AD28 /* descriptor.c */; };
42BD1D9211F251820076AD28 /* descriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 42BD1D8F11F251820076AD28 /* descriptor.h */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = 42BD1D5B11F24F920076AD28 /* upbcore */;
remoteInfo = upbcore;
};
420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
proxyType = 1;
remoteGlobalIDString = 42BD1D6311F24FBA0076AD28 /* upbstream */;
remoteInfo = upbstream;
};
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
420E6F1811F2589F001DA8FE /* test_decoder */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = test_decoder; sourceTree = BUILT_PRODUCTS_DIR; };
42BD1D2811F24E4C0076AD28 /* upb_atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_atomic.h; path = core/upb_atomic.h; sourceTree = "<group>"; };
42BD1D2911F24E4C0076AD28 /* upb_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_def.c; path = core/upb_def.c; sourceTree = "<group>"; };
42BD1D2A11F24E4C0076AD28 /* upb_def.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_def.h; path = core/upb_def.h; sourceTree = "<group>"; };
42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream_vtbl.h; path = core/upb_stream_vtbl.h; sourceTree = "<group>"; };
42BD1D2C11F24E4C0076AD28 /* upb_stream.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stream.c; path = core/upb_stream.c; sourceTree = "<group>"; };
42BD1D2D11F24E4C0076AD28 /* upb_stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stream.h; path = core/upb_stream.h; sourceTree = "<group>"; };
42BD1D2E11F24E4C0076AD28 /* upb_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_string.c; path = core/upb_string.c; sourceTree = "<group>"; };
42BD1D2F11F24E4C0076AD28 /* upb_string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_string.h; path = core/upb_string.h; sourceTree = "<group>"; };
42BD1D3011F24E4C0076AD28 /* upb_table.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_table.c; path = core/upb_table.c; sourceTree = "<group>"; };
42BD1D3111F24E4C0076AD28 /* upb_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_table.h; path = core/upb_table.h; sourceTree = "<group>"; };
42BD1D3211F24E4C0076AD28 /* upb.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb.c; path = core/upb.c; sourceTree = "<group>"; };
42BD1D3311F24E4C0076AD28 /* upb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb.h; path = core/upb.h; sourceTree = "<group>"; };
42BD1D3E11F24EA30076AD28 /* upb_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_decoder.c; path = stream/upb_decoder.c; sourceTree = "<group>"; };
42BD1D3F11F24EA30076AD28 /* upb_decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_decoder.h; path = stream/upb_decoder.h; sourceTree = "<group>"; };
42BD1D4011F24EA30076AD28 /* upb_stdio.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_stdio.c; path = stream/upb_stdio.c; sourceTree = "<group>"; };
42BD1D4111F24EA30076AD28 /* upb_stdio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_stdio.h; path = stream/upb_stdio.h; sourceTree = "<group>"; };
42BD1D4211F24EA30076AD28 /* upb_textprinter.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = upb_textprinter.c; path = stream/upb_textprinter.c; sourceTree = "<group>"; };
42BD1D4311F24EA30076AD28 /* upb_textprinter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = upb_textprinter.h; path = stream/upb_textprinter.h; sourceTree = "<group>"; };
42BD1D4F11F24F3E0076AD28 /* test_decoder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_decoder.c; path = tests/test_decoder.c; sourceTree = "<group>"; };
42BD1D5011F24F3E0076AD28 /* test_def.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_def.c; path = tests/test_def.c; sourceTree = "<group>"; };
42BD1D5111F24F3E0076AD28 /* test_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = test_string.c; path = tests/test_string.c; sourceTree = "<group>"; };
42BD1D5211F24F3E0076AD28 /* test_table.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = test_table.cc; path = tests/test_table.cc; sourceTree = "<group>"; };
42BD1D5311F24F3E0076AD28 /* test_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = test_util.h; path = tests/test_util.h; sourceTree = "<group>"; };
42BD1D5C11F24F920076AD28 /* liblibupbcore.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbcore.a; sourceTree = BUILT_PRODUCTS_DIR; };
42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibupbstream.a; sourceTree = BUILT_PRODUCTS_DIR; };
42BD1D8D11F251820076AD28 /* descriptor_const.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor_const.h; path = descriptor/descriptor_const.h; sourceTree = "<group>"; };
42BD1D8E11F251820076AD28 /* descriptor.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = descriptor.c; path = descriptor/descriptor.c; sourceTree = "<group>"; };
42BD1D8F11F251820076AD28 /* descriptor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = descriptor.h; path = descriptor/descriptor.h; sourceTree = "<group>"; };
C6A0FF2C0290799A04C91782 /* upb.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = upb.1; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
420E6F1611F2589F001DA8FE /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
420E6F3B11F259B3001DA8FE /* liblibupbcore.a in Frameworks */,
420E6F3C11F259B3001DA8FE /* liblibupbstream.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
42BD1D5A11F24F920076AD28 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
42BD1D6211F24FBA0076AD28 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
08FB7794FE84155DC02AAC07 /* upb */ = {
isa = PBXGroup;
children = (
08FB7795FE84155DC02AAC07 /* Source */,
C6A0FF2B0290797F04C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
);
name = upb;
sourceTree = "<group>";
};
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
42BD1D8A11F251670076AD28 /* descriptor */,
42BD1D4711F24EB20076AD28 /* tests */,
42BD1D3B11F24E810076AD28 /* stream */,
42BD1D3A11F24E5F0076AD28 /* core */,
);
name = Source;
sourceTree = "<group>";
};
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
42BD1D5C11F24F920076AD28 /* liblibupbcore.a */,
42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */,
420E6F1811F2589F001DA8FE /* test_decoder */,
);
name = Products;
sourceTree = "<group>";
};
42BD1D3A11F24E5F0076AD28 /* core */ = {
isa = PBXGroup;
children = (
42BD1D2811F24E4C0076AD28 /* upb_atomic.h */,
42BD1D2911F24E4C0076AD28 /* upb_def.c */,
42BD1D2A11F24E4C0076AD28 /* upb_def.h */,
42BD1D2B11F24E4C0076AD28 /* upb_stream_vtbl.h */,
42BD1D2C11F24E4C0076AD28 /* upb_stream.c */,
42BD1D2D11F24E4C0076AD28 /* upb_stream.h */,
42BD1D2E11F24E4C0076AD28 /* upb_string.c */,
42BD1D2F11F24E4C0076AD28 /* upb_string.h */,
42BD1D3011F24E4C0076AD28 /* upb_table.c */,
42BD1D3111F24E4C0076AD28 /* upb_table.h */,
42BD1D3211F24E4C0076AD28 /* upb.c */,
42BD1D3311F24E4C0076AD28 /* upb.h */,
);
name = core;
sourceTree = "<group>";
};
42BD1D3B11F24E810076AD28 /* stream */ = {
isa = PBXGroup;
children = (
42BD1D3E11F24EA30076AD28 /* upb_decoder.c */,
42BD1D3F11F24EA30076AD28 /* upb_decoder.h */,
42BD1D4011F24EA30076AD28 /* upb_stdio.c */,
42BD1D4111F24EA30076AD28 /* upb_stdio.h */,
42BD1D4211F24EA30076AD28 /* upb_textprinter.c */,
42BD1D4311F24EA30076AD28 /* upb_textprinter.h */,
);
name = stream;
sourceTree = "<group>";
};
42BD1D4711F24EB20076AD28 /* tests */ = {
isa = PBXGroup;
children = (
42BD1D4F11F24F3E0076AD28 /* test_decoder.c */,
42BD1D5011F24F3E0076AD28 /* test_def.c */,
42BD1D5111F24F3E0076AD28 /* test_string.c */,
42BD1D5211F24F3E0076AD28 /* test_table.cc */,
42BD1D5311F24F3E0076AD28 /* test_util.h */,
);
name = tests;
sourceTree = "<group>";
};
42BD1D8A11F251670076AD28 /* descriptor */ = {
isa = PBXGroup;
children = (
42BD1D8D11F251820076AD28 /* descriptor_const.h */,
42BD1D8E11F251820076AD28 /* descriptor.c */,
42BD1D8F11F251820076AD28 /* descriptor.h */,
);
name = descriptor;
sourceTree = "<group>";
};
C6A0FF2B0290797F04C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
C6A0FF2C0290799A04C91782 /* upb.1 */,
);
name = Documentation;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
42BD1D5811F24F920076AD28 /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
42BD1D9011F251820076AD28 /* descriptor_const.h in Headers */,
42BD1D9211F251820076AD28 /* descriptor.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
42BD1D6011F24FBA0076AD28 /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
420E6F1711F2589F001DA8FE /* test_decoder */ = {
isa = PBXNativeTarget;
buildConfigurationList = 420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */;
buildPhases = (
420E6F1511F2589F001DA8FE /* Sources */,
420E6F1611F2589F001DA8FE /* Frameworks */,
);
buildRules = (
);
dependencies = (
420E6F3411F2598D001DA8FE /* PBXTargetDependency */,
420E6F3611F2598D001DA8FE /* PBXTargetDependency */,
);
name = test_decoder;
productName = test_decoder;
productReference = 420E6F1811F2589F001DA8FE /* test_decoder */;
productType = "com.apple.product-type.tool";
};
42BD1D5B11F24F920076AD28 /* upbcore */ = {
isa = PBXNativeTarget;
buildConfigurationList = 42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */;
buildPhases = (
42BD1D5811F24F920076AD28 /* Headers */,
42BD1D5911F24F920076AD28 /* Sources */,
42BD1D5A11F24F920076AD28 /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = upbcore;
productName = libupbcore;
productReference = 42BD1D5C11F24F920076AD28 /* liblibupbcore.a */;
productType = "com.apple.product-type.library.static";
};
42BD1D6311F24FBA0076AD28 /* upbstream */ = {
isa = PBXNativeTarget;
buildConfigurationList = 42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */;
buildPhases = (
42BD1D6011F24FBA0076AD28 /* Headers */,
42BD1D6111F24FBA0076AD28 /* Sources */,
42BD1D6211F24FBA0076AD28 /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = upbstream;
productName = libupbstream;
productReference = 42BD1D6411F24FBA0076AD28 /* liblibupbstream.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */;
compatibilityVersion = "Xcode 3.1";
hasScannedForEncodings = 1;
mainGroup = 08FB7794FE84155DC02AAC07 /* upb */;
projectDirPath = "";
projectRoot = "";
targets = (
42BD1D5B11F24F920076AD28 /* upbcore */,
42BD1D6311F24FBA0076AD28 /* upbstream */,
420E6F1711F2589F001DA8FE /* test_decoder */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
420E6F1511F2589F001DA8FE /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
420E6F1C11F258AE001DA8FE /* test_decoder.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
42BD1D5911F24F920076AD28 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
42BD1D6E11F2500D0076AD28 /* upb.c in Sources */,
42BD1D7011F2500D0076AD28 /* upb_def.c in Sources */,
42BD1D7211F2500D0076AD28 /* upb_stream.c in Sources */,
42BD1D7311F2500D0076AD28 /* upb_string.c in Sources */,
42BD1D7411F2500D0076AD28 /* upb_table.c in Sources */,
42BD1D9111F251820076AD28 /* descriptor.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
42BD1D6111F24FBA0076AD28 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
42BD1D7611F250B90076AD28 /* upb_decoder.c in Sources */,
42BD1D7711F250B90076AD28 /* upb_stdio.c in Sources */,
42BD1D7811F250B90076AD28 /* upb_textprinter.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */
420E6F3411F2598D001DA8FE /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = 42BD1D5B11F24F920076AD28 /* upbcore */;
targetProxy = 420E6F3311F2598D001DA8FE /* PBXContainerItemProxy */;
};
420E6F3611F2598D001DA8FE /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = 42BD1D6311F24FBA0076AD28 /* upbstream */;
targetProxy = 420E6F3511F2598D001DA8FE /* PBXContainerItemProxy */;
};
/* End PBXTargetDependency section */
/* Begin XCBuildConfiguration section */
1DEB928A08733DD80010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
ONLY_ACTIVE_ARCH = YES;
PREBINDING = NO;
SDKROOT = macosx10.6;
};
name = Debug;
};
1DEB928B08733DD80010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_PREPROCESSOR_DEFINITIONS = UPB_THREAD_UNSAFE;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
PREBINDING = NO;
SDKROOT = macosx10.6;
};
name = Release;
};
420E6F1A11F258A0001DA8FE /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
COPY_PHASE_STRIP = NO;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_FIX_AND_CONTINUE = YES;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
INSTALL_PATH = /usr/local/bin;
PREBINDING = NO;
PRODUCT_NAME = test_decoder;
};
name = Debug;
};
420E6F1B11F258A0001DA8FE /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
COPY_PHASE_STRIP = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_ENABLE_FIX_AND_CONTINUE = NO;
GCC_MODEL_TUNING = G5;
INSTALL_PATH = /usr/local/bin;
PREBINDING = NO;
PRODUCT_NAME = test_decoder;
ZERO_LINK = NO;
};
name = Release;
};
42BD1D5D11F24F930076AD28 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
COPY_PHASE_STRIP = NO;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_FIX_AND_CONTINUE = YES;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
INSTALL_PATH = /usr/local/lib;
PREBINDING = NO;
PRODUCT_NAME = libupbcore;
};
name = Debug;
};
42BD1D5E11F24F930076AD28 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
COPY_PHASE_STRIP = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_ENABLE_FIX_AND_CONTINUE = NO;
GCC_MODEL_TUNING = G5;
INSTALL_PATH = /usr/local/lib;
PREBINDING = NO;
PRODUCT_NAME = libupbcore;
ZERO_LINK = NO;
};
name = Release;
};
42BD1D6511F24FBA0076AD28 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
COPY_PHASE_STRIP = NO;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_FIX_AND_CONTINUE = YES;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
INSTALL_PATH = /usr/local/lib;
PREBINDING = NO;
PRODUCT_NAME = libupbstream;
};
name = Debug;
};
42BD1D6611F24FBA0076AD28 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
COPY_PHASE_STRIP = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_ENABLE_FIX_AND_CONTINUE = NO;
GCC_MODEL_TUNING = G5;
INSTALL_PATH = /usr/local/lib;
PREBINDING = NO;
PRODUCT_NAME = libupbstream;
ZERO_LINK = NO;
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "upb" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB928A08733DD80010E9CD /* Debug */,
1DEB928B08733DD80010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
420E6F1F11F258CC001DA8FE /* Build configuration list for PBXNativeTarget "test_decoder" */ = {
isa = XCConfigurationList;
buildConfigurations = (
420E6F1A11F258A0001DA8FE /* Debug */,
420E6F1B11F258A0001DA8FE /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
42BD1D5F11F24FB10076AD28 /* Build configuration list for PBXNativeTarget "upbcore" */ = {
isa = XCConfigurationList;
buildConfigurations = (
42BD1D5D11F24F930076AD28 /* Debug */,
42BD1D5E11F24F930076AD28 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
42BD1D6911F24FED0076AD28 /* Build configuration list for PBXNativeTarget "upbstream" */ = {
isa = XCConfigurationList;
buildConfigurations = (
42BD1D6511F24FBA0076AD28 /* Debug */,
42BD1D6611F24FBA0076AD28 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
}
Loading…
Cancel
Save