Sync with internal Google development.

This breaks the open-source build, will
follow up with a change to fix it.
pull/13171/head
Joshua Haberman 13 years ago
parent b5f5ee867e
commit 1bcab1377d
  1. 16
      Makefile
  2. 3
      benchmarks/parsestream.upb.c
  3. 24
      benchmarks/parsetoproto2.upb.cc
  4. 8
      benchmarks/parsetostruct.upb.c
  5. 39
      bindings/cpp/upb/bytestream.cc
  6. 238
      bindings/cpp/upb/bytestream.hpp
  7. 77
      bindings/cpp/upb/def.hpp
  8. 95
      bindings/cpp/upb/handlers.hpp
  9. 83
      bindings/cpp/upb/pb/decoder.hpp
  10. 23
      bindings/cpp/upb/upb.hpp
  11. 76
      examples/stream_transcode.c
  12. 15
      tests/test_cpp.cc
  13. 700
      tests/test_decoder.c
  14. 40
      tests/test_varint.c
  15. 27
      tests/test_vs_proto2.cc
  16. 10
      tests/tests.c
  17. 95
      upb/bytestream.c
  18. 97
      upb/bytestream.h
  19. 2
      upb/def.c
  20. 140
      upb/handlers.c
  21. 69
      upb/handlers.h
  22. 6
      upb/msg.c
  23. 361
      upb/pb/decoder.c
  24. 98
      upb/pb/decoder.h
  25. 306
      upb/pb/decoder_x64.dasc
  26. 33
      upb/pb/glue.c
  27. 4
      upb/pb/glue.h
  28. 40
      upb/pb/varint.h
  29. 9
      upb/table.h
  30. 97
      upb/upb.c
  31. 37
      upb/upb.h

@ -162,13 +162,9 @@ upb/pb/jit_debug_elf_file.o: upb/pb/jit_debug_elf_file.s
$(E) GAS $<
$(Q) gcc -c upb/pb/jit_debug_elf_file.s -o upb/pb/jit_debug_elf_file.o
upb/pb/jit_debug_elf_file2.o: upb/pb/jit_debug_elf_file.o
$(E) OBJCOPY $<
$(Q) objcopy --change-section-address .text=0x12345678 $< $@
upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file2.o
upb/pb/jit_debug_elf_file.h: upb/pb/jit_debug_elf_file.o
$(E) XXD $<
$(Q) xxd -i < upb/pb/jit_debug_elf_file2.o > upb/pb/jit_debug_elf_file.h
$(Q) xxd -i < upb/pb/jit_debug_elf_file.o > upb/pb/jit_debug_elf_file.h
upb/pb/decoder_x64.h: upb/pb/jit_debug_elf_file.h
endif
@ -232,15 +228,13 @@ VALGRIND=valgrind --leak-check=full --error-exitcode=1
test: tests
@echo Running all tests under valgrind.
@set -e # Abort on error.
@for test in $(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS); do \
@for test in $(TESTS); do \
if [ -x ./$$test ] ; then \
echo !!! $(VALGRIND) ./$$test; \
$(VALGRIND) ./$$test tests/test.proto.pb || exit 1; \
$(VALGRIND) ./$$test || exit 1; \
fi \
done; \
$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage1 benchmarks/google_messages.proto.pb benchmarks/google_message1.dat
$(VALGRIND) ./tests/t.test_vs_proto2.googlemessage2 benchmarks/google_messages.proto.pb benchmarks/google_message2.dat
@echo "All tests passed!"
echo "All tests passed!"
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2: \

@ -76,7 +76,8 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
upb_stringsrc_reset(&stringsrc, input_str, input_len);
upb_decoder_reset(&decoder, upb_stringsrc_allbytes(&stringsrc), NULL);
upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc),
0, UPB_NONDELIMITED, NULL);
upb_decoder_decode(&decoder, &status);
if(!upb_ok(&status)) goto err;
return input_len;

@ -24,7 +24,6 @@
#include <google/protobuf/descriptor.h>
#undef private
char *str;
static size_t len;
MESSAGE_CIDENT msg[NUM_MESSAGES];
MESSAGE_CIDENT msg2;
@ -54,13 +53,9 @@ upb_flow_t proto2_setstr(void *m, upb_value fval, upb_value val) {
const upb_fielddef *f = upb_value_getfielddef(fval);
std::string **str = (std::string**)UPB_INDEX(m, f->offset, 1);
if (*str == f->default_ptr) *str = new std::string;
const upb_byteregion *ref = upb_value_getbyteregion(val);
uint32_t len;
(*str)->assign(
upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len),
upb_byteregion_len(ref));
assert(len == upb_byteregion_len(ref));
const upb_strref *ref = upb_value_getstrref(val);
// XXX: only supports contiguous strings atm.
(*str)->assign(ref->ptr, ref->len);
return UPB_CONTINUE;
}
@ -69,13 +64,9 @@ upb_flow_t proto2_append_str(void *_r, upb_value fval, upb_value val) {
typedef google::protobuf::RepeatedPtrField<std::string> R;
(void)fval;
R *r = (R*)_r;
const upb_byteregion *ref = upb_value_getbyteregion(val);
const upb_strref *ref = upb_value_getstrref(val);
// XXX: only supports contiguous strings atm.
uint32_t len;
r->Add()->assign(
upb_byteregion_getptr(ref, upb_byteregion_startofs(ref), &len),
upb_byteregion_len(ref));
assert(len == upb_byteregion_len(ref));
r->Add()->assign(ref->ptr, ref->len);
return UPB_CONTINUE;
}
@ -274,7 +265,7 @@ static bool initialize()
upb_symtab_unref(s);
// Read the message data itself.
str = upb_readfile(MESSAGE_FILE, &len);
char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
@ -284,6 +275,7 @@ static bool initialize()
msg2.ParseFromArray(str, len);
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, def);
if (!JIT) h->should_jit = false;
@ -304,8 +296,8 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
msg[i % NUM_MESSAGES].Clear();
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &msg[i % NUM_MESSAGES]);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
0, UPB_NONDELIMITED, &msg[i % NUM_MESSAGES]);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return len;

@ -8,7 +8,6 @@
#include "upb/pb/glue.h"
static const upb_msgdef *def;
char *str;
static size_t len;
static void *msg[NUM_MESSAGES];
static upb_stringsrc strsrc;
@ -34,7 +33,7 @@ static bool initialize()
upb_symtab_unref(s);
// Read the message data itself.
str = upb_readfile(MESSAGE_FILE, &len);
char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
@ -44,6 +43,7 @@ static bool initialize()
msg[i] = upb_stdmsg_new(def);
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, def);
if (!JIT) h->should_jit = false;
@ -70,8 +70,8 @@ static size_t run(int i)
upb_status status = UPB_STATUS_INIT;
i %= NUM_MESSAGES;
upb_msg_clear(msg[i], def);
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg[i]);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc),
0, UPB_NONDELIMITED, msg[i]);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return len;

@ -0,0 +1,39 @@
//
// upb - a minimalist implementation of protocol buffers.
//
// Copyright (c) 2011 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
#include "bytestream.hpp"
namespace upb {
upb_bytesrc_vtbl* ByteSourceBase::vtable() {
static upb_bytesrc_vtbl vtbl = {
&ByteSourceBase::VFetch,
&ByteSourceBase::VDiscard,
&ByteSourceBase::VCopy,
&ByteSourceBase::VGetPtr,
};
return &vtbl;
}
upb_bytesuccess_t ByteSourceBase::VFetch(void *src, uint64_t ofs, size_t *len) {
return static_cast<ByteSourceBase*>(src)->Fetch(ofs, len);
}
void ByteSourceBase::VCopy(
const void *src, uint64_t ofs, size_t len, char* dest) {
static_cast<const ByteSourceBase*>(src)->Copy(ofs, len, dest);
}
void ByteSourceBase::VDiscard(void *src, uint64_t ofs) {
static_cast<ByteSourceBase*>(src)->Discard(ofs);
}
const char * ByteSourceBase::VGetPtr(
const void *src, uint64_t ofs, size_t* len) {
return static_cast<const ByteSourceBase*>(src)->GetPtr(ofs, len);
}
} // namespace upb

@ -0,0 +1,238 @@
//
// upb - a minimalist implementation of protocol buffers.
//
// Copyright (c) 2011 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
//
// This file defines three core interfaces:
// - upb::ByteSink: for writing streams of data.
// - upb::ByteSource: for reading streams of data.
// - upb::ByteRegion: for reading from a specific region of a ByteSource;
// should be used by decoders instead of using a ByteSource directly.
//
// These interfaces are used by streaming encoders and decoders: for example, a
// protobuf parser gets its input from a upb::ByteRegion. They are virtual
// base classes so concrete implementations can get the data from a fd, a
// FILE*, a string, etc.
//
// A ByteRegion represents a region of data from a ByteSource.
//
// Parsers get data from this interface instead of a bytesrc because we often
// want to parse only a specific region of the input. For example, if we parse
// a string from our input but know that the string represents a protobuf, we
// can pass its ByteRegion to an appropriate protobuf parser.
//
// Since the bytes may be coming from a file or network socket, bytes must be
// fetched before they can be read (though in some cases this fetch may be a
// no-op). "fetch" is the only operation on a byteregion that could fail or
// block, because it is the only operation that actually performs I/O.
//
// Bytes can be discarded when they are no longer needed. Parsers should
// always discard bytes they no longer need, both so the buffers can be freed
// when possible and to give better visibility into what bytes the parser is
// still using.
//
// start discard read fetch end
// ofs ofs ofs ofs ofs
// | |--->Discard() | |--->Fetch() |
// V V V V V
// +-------------+-------------------------+-----------------+-----------------+
// | discarded | | | fetchable |
// +-------------+-------------------------+-----------------+-----------------+
// | <------------- loaded ------------------> |
// | <- available -> |
// | <---------- remaining ----------> |
//
// Note that the start offset may be something other than zero! A byteregion
// is a view into an underlying bytesrc stream, and the region may start
// somewhere other than the beginning of that stream.
//
// The region can be either delimited or nondelimited. A non-delimited region
// will keep returning data until the underlying data source returns EOF. A
// delimited region will return EOF at a predetermined offset.
//
// end
// ofs
// |
// V
// +-----------------------+
// | delimited region | <-- hard EOF, even if data source has more data.
// +-----------------------+
//
// +------------------------
// | nondelimited region Z <-- won't return EOF until data source hits EOF.
// +------------------------
#ifndef UPB_BYTESTREAM_HPP
#define UPB_BYTESTREAM_HPP
#include "upb/bytestream.h"
#include "upb/upb.hpp"
namespace upb {
typedef upb_bytesuccess_t ByteSuccess;
// Implement this interface to vend bytes to ByteRegions which will be used by
// a decoder.
class ByteSourceBase : public upb_bytesrc {
public:
ByteSourceBase() { upb_bytesrc_init(this, vtable()); }
virtual ~ByteSourceBase() { upb_bytesrc_uninit(this); }
// Fetches at least one byte starting at ofs, setting *len to the actual
// number of bytes fetched (or 0 on EOF or error: see return value for
// details). It is valid for bytes to be fetched multiple times, as long as
// the bytes have not been previously discarded.
virtual ByteSuccess Fetch(uint64_t ofs, size_t* len) = 0;
// Discards all data prior to ofs (except data that is pinned, if pinning
// support is added -- see TODO below).
virtual void Discard(uint64_t ofs) = 0;
// Copies "len" bytes of data from ofs to "dst", which must be at least "len"
// bytes long. The given region must not be discarded.
virtual void Copy(uint64_t ofs, size_t len, char *dst) const = 0;
// Returns a pointer to the bytesrc's internal buffer, storing in *len how
// much data is available. The given offset must not be discarded. The
// returned buffer is valid for as long as its bytes are not discarded (in
// the case that part of the returned buffer is discarded, only the
// non-discarded bytes remain valid).
virtual const char *GetPtr(uint64_t ofs, size_t *len) const = 0;
// TODO: Add if/when there is a demonstrated need:
//
// // When the caller pins a region (which must not be already discarded), it
// // is guaranteed that the region will not be discarded (nor will the
// // bytesrc be destroyed) until the region is unpinned. However, not all
// // bytesrc's support pinning; a false return indicates that a pin was not
// // possible.
// virtual bool Pin(uint64_t ofs, size_t len);
//
// // Releases some number of pinned bytes from the beginning of a pinned
// // region (which may be fewer than the total number of bytes pinned).
// virtual void Unpin(uint64_t ofs, size_t len, size_t bytes_to_release);
//
// Adding pinning support would also involve adding a "pin_ofs" parameter to
// upb_bytesrc_fetch, so that the fetch can extend an already-pinned region.
private:
static upb_bytesrc_vtbl* vtable();
static upb_bytesuccess_t VFetch(void*, uint64_t, size_t*);
static void VDiscard(void*, uint64_t);
static void VCopy(const void*, uint64_t, size_t, char*);
static const char *VGetPtr(const void*, uint64_t, size_t*);
};
class ByteRegion : public upb_byteregion {
public:
static const uint64_t kNondelimited = UPB_NONDELIMITED;
ByteRegion() { upb_byteregion_init(this); }
~ByteRegion() { upb_byteregion_uninit(this); }
// Accessors for the regions bounds -- the meaning of these is described in
// the diagram above.
uint64_t start_ofs() const { return upb_byteregion_startofs(this); }
uint64_t discard_ofs() const { return upb_byteregion_discardofs(this); }
uint64_t fetch_ofs() const { return upb_byteregion_fetchofs(this); }
uint64_t end_ofs() const { return upb_byteregion_endofs(this); }
// Returns how many bytes are fetched and available for reading starting from
// offset "offset".
uint64_t BytesAvailable(uint64_t offset) const {
return upb_byteregion_available(this, offset);
}
// Returns the total number of bytes remaining after offset "offset", or
// kNondelimited if the byteregion is non-delimited.
uint64_t BytesRemaining(uint64_t offset) const {
return upb_byteregion_remaining(this, offset);
}
uint64_t Length() const { return upb_byteregion_len(this); }
// Sets the value of this byteregion to be a subset of the given byteregion's
// data. The caller is responsible for releasing this region before the src
// region is released (unless the region is first pinned, if pinning support
// is added. see below).
void Reset(const upb_byteregion *src, uint64_t ofs, uint64_t len) {
upb_byteregion_reset(this, src, ofs, len);
}
void Release() { upb_byteregion_release(this); }
// Attempts to fetch more data, extending the fetched range of this
// byteregion. Returns true if the fetched region was extended by at least
// one byte, false on EOF or error (see *s for details).
ByteSuccess Fetch() { return upb_byteregion_fetch(this); }
// Fetches all remaining data, returning false if the operation failed (see
// *s for details). May only be used on delimited byteregions.
ByteSuccess FetchAll() { return upb_byteregion_fetchall(this); }
// Discards bytes from the byteregion up until ofs (which must be greater or
// equal to discard_ofs()). It is valid to discard bytes that have not been
// fetched (such bytes will never be fetched) but it is an error to discard
// past the end of a delimited byteregion.
void Discard(uint64_t ofs) { return upb_byteregion_discard(this, ofs); }
// Copies "len" bytes of data into "dst", starting at ofs. The specified
// region must be available.
void Copy(uint64_t ofs, size_t len, char *dst) const {
upb_byteregion_copy(this, ofs, len, dst);
}
// Copies all bytes from the byteregion into dst. Requires that the entire
// byteregion is fetched and that none has been discarded.
void CopyAll(char *dst) const {
upb_byteregion_copyall(this, dst);
}
// Returns a pointer to the internal buffer for the byteregion starting at
// offset "ofs." Stores the number of bytes available in this buffer in *len.
// The returned buffer is invalidated when the byteregion is reset or
// released, or when the bytes are discarded. If the byteregion is not
// currently pinned, the pointer is only valid for the lifetime of the parent
// byteregion.
const char *GetPtr(uint64_t ofs, size_t *len) const {
return upb_byteregion_getptr(this, ofs, len);
}
// Copies the contents of the byteregion into a newly-allocated,
// NULL-terminated string. Requires that the byteregion is fully fetched.
char *StrDup() const {
return upb_byteregion_strdup(this);
}
// TODO: add if/when there is a demonstrated need.
//
// // Pins this byteregion's bytes in memory, allowing it to outlive its
// // parent byteregion. Normally a byteregion may only be used while its
// // parent is still valid, but a pinned byteregion may continue to be used
// // until it is reset or released. A byteregion must be fully fetched to
// // be pinned (this implies that the byteregion must be delimited).
// //
// // In some cases this operation may cause the input data to be copied.
// //
// // void Pin();
};
class StringSource : public upb_stringsrc {
public:
StringSource() : upb_stringsrc() { upb_stringsrc_init(this); }
~StringSource() { upb_stringsrc_uninit(this); }
void Reset(const char* data, size_t len) {
upb_stringsrc_reset(this, data, len);
}
ByteRegion* AllBytes() {
return static_cast<ByteRegion*>(upb_stringsrc_allbytes(this));
}
upb_bytesrc* ByteSource() { return upb_stringsrc_bytesrc(this); }
};
} // namespace upb
#endif

@ -1,42 +1,41 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* The set of upb::*Def classes and upb::SymbolTable allow for defining and
* manipulating schema information (as defined in .proto files).
*
* Defs go through two distinct phases of life:
*
* 1. MUTABLE: when first created, the properties of the def can be set freely
* (for example a message's name, its list of fields, the name/number of
* fields, etc). During this phase the def is *not* thread-safe, and may
* not be used for any purpose except to set its properties (it can't be
* used to parse anything, create any messages in memory, etc).
*
* 2. FINALIZED: after being added to a symtab (which links the defs together)
* the defs become finalized (thread-safe and immutable). Programs may only
* access defs through a CONST POINTER during this stage -- upb_symtab will
* help you out with this requirement by only vending const pointers, but
* you need to make sure not to use any non-const pointers you still have
* sitting around. In practice this means that you may not call any setters
* on the defs (or functions that themselves call the setters). If you want
* to modify an existing immutable def, copy it with upb_*_dup(), modify the
* copy, and add the modified def to the symtab (replacing the existing
* def).
*
* You can test for which stage of life a def is in by calling
* upb::Def::IsMutable(). This is particularly useful for dynamic language
* bindings, which must properly guarantee that the dynamic language cannot
* break the rules laid out above.
*
* It would be possible to make the defs thread-safe during stage 1 by using
* mutexes internally and changing any methods returning pointers to return
* copies instead. This could be important if we are integrating with a VM or
* interpreter that does not naturally serialize access to wrapped objects (for
* example, in the case of Python this is not necessary because of the GIL).
*/
//
// upb - a minimalist implementation of protocol buffers.
//
// Copyright (c) 2011 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
//
// The set of upb::*Def classes and upb::SymbolTable allow for defining and
// manipulating schema information (as defined in .proto files).
//
// Defs go through two distinct phases of life:
//
// 1. MUTABLE: when first created, the properties of the def can be set freely
// (for example a message's name, its list of fields, the name/number of
// fields, etc). During this phase the def is *not* thread-safe, and may
// not be used for any purpose except to set its properties (it can't be
// used to parse anything, create any messages in memory, etc).
//
// 2. FINALIZED: after being added to a symtab (which links the defs together)
// the defs become finalized (thread-safe and immutable). Programs may only
// access defs through a CONST POINTER during this stage -- upb_symtab will
// help you out with this requirement by only vending const pointers, but
// you need to make sure not to use any non-const pointers you still have
// sitting around. In practice this means that you may not call any setters
// on the defs (or functions that themselves call the setters). If you want
// to modify an existing immutable def, copy it with upb_*_dup(), modify the
// copy, and add the modified def to the symtab (replacing the existing
// def).
//
// You can test for which stage of life a def is in by calling
// upb::Def::IsMutable(). This is particularly useful for dynamic language
// bindings, which must properly guarantee that the dynamic language cannot
// break the rules laid out above.
//
// It would be possible to make the defs thread-safe during stage 1 by using
// mutexes internally and changing any methods returning pointers to return
// copies instead. This could be important if we are integrating with a VM or
// interpreter that does not naturally serialize access to wrapped objects (for
// example, in the case of Python this is not necessary because of the GIL).
#ifndef UPB_DEF_HPP
#define UPB_DEF_HPP

@ -1,15 +1,14 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* upb::Handlers is a generic visitor-like interface for iterating over a
* stream of protobuf data. You can register function pointers that will be
* called for each message and/or field as the data is being parsed or iterated
* over, without having to know the source format that we are parsing from.
* This decouples the parsing logic from the processing logic.
*/
//
// upb - a minimalist implementation of protocol buffers.
//
// Copyright (c) 2011 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
//
// upb::Handlers is a generic visitor-like interface for iterating over a
// stream of protobuf data. You can register function pointers that will be
// called for each message and/or field as the data is being parsed or iterated
// over, without having to know the source format that we are parsing from.
// This decouples the parsing logic from the processing logic.
#ifndef UPB_HANDLERS_HPP
#define UPB_HANDLERS_HPP
@ -18,6 +17,7 @@
namespace upb {
typedef upb_fieldtype_t FieldType;
typedef upb_flow_t Flow;
class MessageHandlers;
@ -30,8 +30,8 @@ class FieldHandlers : public upb_fhandlers {
// The FieldHandlers will live at least as long as the upb::Handlers to
// which it belongs, but can be Ref'd/Unref'd to make it live longer (which
// will prolong the life of the underlying upb::Handlers also).
void Ref() const { upb_fhandlers_ref(this); }
void Unref() const { upb_fhandlers_unref(this); }
void Ref() { upb_fhandlers_ref(this); }
void Unref() { upb_fhandlers_unref(this); }
// Functions to set this field's handlers.
// These return "this" so they can be conveniently chained, eg.
@ -46,13 +46,13 @@ class FieldHandlers : public upb_fhandlers {
upb_fhandlers_setstartseq(this, h); return this;
}
FieldHandlers* SetEndSequenceHandler(EndFieldHandler* h) {
upb_fhandlers_endseq(this, h); return this;
upb_fhandlers_setendseq(this, h); return this;
}
FieldHandlers* SetStartSubmessageHandler(StartFieldHandler* h) {
upb_fhandlers_setstartsubmsg(this, h); return this;
}
FieldHandlers* SetEndSubmessageHandler(EndFieldHandler* h) {
upb_fhandlers_endsubmsg(this, h); return this;
upb_fhandlers_setendsubmsg(this, h); return this;
}
// Get/Set the field's bound value, which will be passed to its handlers.
@ -62,27 +62,20 @@ class FieldHandlers : public upb_fhandlers {
}
// Returns the MessageHandlers to which we belong.
MessageHandlers* GetMessageHandlers() const {
return upb_fhandlers_msg(this);
}
MessageHandlers* GetMessageHandlers() const;
// Returns the MessageHandlers for this field's submessage (invalid to call
// unless this field's type UPB_TYPE(MESSAGE) or UPB_TYPE(GROUP).
MessageHandlers* GetSubMessageHandlers() const {
return upb_fhandlers_submsg(this);
}
MessageHandlers* GetSubMessageHandlers() const;
// If set to >=0, the given hasbit will be set after the value callback is
// called (relative to the current closure).
int32_t GetValueHasbit() const { return upb_fhandler_valuehasbit(this); }
void SetValueHasbit(int32_t bit) { upb_fhandler_setvaluehasbit(this, bit); }
// called (offset relative to the current closure).
int32_t GetValueHasbit() const { return upb_fhandlers_getvaluehasbit(this); }
void SetValueHasbit(int32_t bit) { upb_fhandlers_setvaluehasbit(this, bit); }
private:
FieldHandlers(); // Only created by upb::Handlers.
~FieldHandlers(); // Only destroyed by refcounting.
};
class MessageHandlers : public upb_mhandlers {
public:
typedef upb_startmsg_handler StartMessageHandler;
@ -91,8 +84,8 @@ class MessageHandlers : public upb_mhandlers {
// The MessageHandlers will live at least as long as the upb::Handlers to
// which it belongs, but can be Ref'd/Unref'd to make it live longer (which
// will prolong the life of the underlying upb::Handlers also).
void Ref() const { upb_mhandlers_ref(this); }
void Unref() const { upb_mhandlers_unref(this); }
void Ref() { upb_mhandlers_ref(this); }
void Unref() { upb_mhandlers_unref(this); }
// Functions to set this message's handlers.
// These return "this" so they can be conveniently chained, eg.
@ -107,12 +100,10 @@ class MessageHandlers : public upb_mhandlers {
}
// Functions to create new FieldHandlers for this message.
FieldHandlers* NewFieldHandlers(uint32_t fieldnum, upb_fieldtype_t type,
FieldHandlers* NewFieldHandlers(uint32_t fieldnum, FieldType type,
bool repeated) {
return upb_mhandlers_newfhandlers(this, fieldnum, type, repeated);
}
FieldHandlers* NewFieldHandlers(FieldDef* f) {
return upb_mhandlers_newfhandlers_fordef(f);
return static_cast<FieldHandlers*>(
upb_mhandlers_newfhandlers(this, fieldnum, type, repeated));
}
// Like the previous but for MESSAGE or GROUP fields. For GROUP fields, the
@ -120,15 +111,10 @@ class MessageHandlers : public upb_mhandlers {
FieldHandlers* NewFieldHandlersForSubmessage(uint32_t n, const char *name,
FieldType type, bool repeated,
MessageHandlers* subm) {
return upb_mhandlers_newsubmsgfhandlers(this, n, type, repeated, subm);
}
FieldHandlers* NewFieldHandlersForSubmessage(FieldDef* f,
MessageHandlers* subm) {
return upb_mhandlers_newsubmsgfhandlers_fordef(f);
return static_cast<FieldHandlers*>(
upb_mhandlers_newfhandlers_subm(this, n, type, repeated, subm));
}
private:
MessageHandlers(); // Only created by upb::Handlers.
~MessageHandlers(); // Only destroyed by refcounting.
@ -137,26 +123,31 @@ class MessageHandlers : public upb_mhandlers {
class Handlers : public upb_handlers {
public:
// Creates a new Handlers instance.
Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
static Handlers* New() { return static_cast<Handlers*>(upb_handlers_new()); }
void Ref() { upb_handlers_ref(this); }
void Unref() { upb_handlers_unref(this); }
// Returns a new MessageHandlers object. The first such message that is
// obtained will be the top-level message for this Handlers object.
MessageHandlers* NewMessageHandlers() { return upb_handlers_newmhandlers(this); }
// Freezes the handlers against future modification. Handlers must be
// finalized before they can be passed to a data producer. After Finalize()
// has been called, you may only call const methods on the Handlers and its
// MessageHandlers/FieldHandlers.
void Finalize() { upb_handlers_finalize(this); }
MessageHandlers* NewMessageHandlers() {
return static_cast<MessageHandlers*>(upb_handlers_newmhandlers(this));
}
private:
FieldHandlers(); // Only created by Handlers::New().
~FieldHandlers(); // Only destroyed by refcounting.
Handlers(); // Only created by Handlers::New().
~Handlers(); // Only destroyed by refcounting.
};
MessageHandlers* FieldHandlers::GetMessageHandlers() const {
return static_cast<MessageHandlers*>(upb_fhandlers_getmsg(this));
}
MessageHandlers* FieldHandlers::GetSubMessageHandlers() const {
return static_cast<MessageHandlers*>(upb_fhandlers_getsubmsg(this));
}
} // namespace upb
#endif

@ -0,0 +1,83 @@
//
// upb - a minimalist implementation of protocol buffers.
//
// Copyright (c) 2011 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
//
// upb::Decoder is a high performance, streaming decoder for protobuf
// data that works by getting its input data from a ubp::ByteRegion and calling
// into a upb::Handlers.
//
// A DecoderPlan contains whatever data structures and generated (JIT-ted) code
// are necessary to decode protobuf data of a specific type to a specific set
// of handlers. By generating the plan ahead of time, we avoid having to
// redo this work every time we decode.
//
// A DecoderPlan is threadsafe, meaning that it can be used concurrently by
// different upb::Decoders in different threads. However, the upb::Decoders are
// *not* thread-safe.
#ifndef UPB_PB_DECODER_HPP
#define UPB_PB_DECODER_HPP
#include "upb/pb/decoder.h"
#include "upb/bytestream.hpp"
#include "upb/upb.hpp"
namespace upb {
class DecoderPlan : public upb_decoderplan {
public:
static DecoderPlan* New(Handlers* h, bool allow_jit) {
return static_cast<DecoderPlan*>(upb_decoderplan_new(h, allow_jit));
}
void Unref() { upb_decoderplan_unref(this); }
// Returns true if the plan contains JIT-ted code. This may not be the same
// as the "allowjit" parameter to the constructor if support for JIT-ting was
// not compiled in.
bool HasJitCode() { return upb_decoderplan_hasjitcode(this); }
private:
DecoderPlan() {} // Only constructed by New
};
class Decoder : public upb_decoder {
public:
Decoder() { upb_decoder_init(this); }
~Decoder() { upb_decoder_uninit(this); }
// Resets the plan that the decoder will parse from. This will also reset the
// decoder's input to be uninitialized -- ResetInput() must be called before
// parsing can occur. The plan must live until the decoder is destroyed or
// reset to a different plan.
//
// Must be called before ResetInput() or Decode().
void ResetPlan(DecoderPlan* plan, int32_t msg_offset) {
upb_decoder_resetplan(this, plan, msg_offset);
}
// Resets the input of the decoder. This puts it in a state where it has not
// seen any data, and expects the next data to be from the beginning of a new
// protobuf.
//
// ResetInput() must be called before Decode() but may be called more than
// once. "input" must live until the decoder destroyed or ResetInput is
// called again. "c" is the closure that will be passed to the handlers.
void ResetInput(ByteRegion* byte_region, void* c) {
upb_decoder_resetinput(this, byte_region, c);
}
// Decodes serialized data (calling Handlers as the data is parsed) until
// error or EOF (see status() for details).
Success Decode() { return upb_decoder_decode(this); }
const upb::Status& status() {
return static_cast<const upb::Status&>(*upb_decoder_status(this));
}
};
} // namespace upb
#endif

@ -1,23 +1,34 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
//
// upb - a minimalist implementation of protocol buffers.
//
// Copyright (c) 2011 Google Inc. See LICENSE for details.
// Author: Josh Haberman <jhaberman@gmail.com>
#ifndef UPB_HPP
#define UPB_HPP
#include "upb/upb.h"
#include <iostream>
namespace upb {
typedef upb_success_t Success;
class Status : public upb_status {
public:
Status() { upb_status_init(this); }
~Status() { upb_status_uninit(this); }
bool ok() const { return upb_ok(this); }
bool eof() const { return upb_eof(this); }
const char *GetString() const { return upb_status_getstr(this); }
void SetEof() { upb_status_seteof(this); }
void SetErrorLiteral(const char* msg) {
upb_status_seterrliteral(this, msg);
}
void Clear() { upb_status_clear(this); }
};
class Value : public upb_value {

@ -0,0 +1,76 @@
#include <stdlib.h>
#include "upb/bytestream.h"
#include "upb/pb/decoder.h"
#include "upb/pb/glue.h"
#include "upb/pb/textprinter.h"
int main(int argc, char *argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: stream_transcode <descfile> <msgname>\n");
return 1;
}
upb_symtab *symtab = upb_symtab_new();
size_t desc_len;
const char *desc = upb_readfile(argv[1], &desc_len);
if (!desc) {
fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
return 1;
}
upb_status status = UPB_STATUS_INIT;
upb_load_descriptor_into_symtab(symtab, desc, desc_len, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing descriptor: %s", upb_status_getstr(&status));
return 1;
}
free((void*)desc);
const upb_def *md = upb_symtab_lookup(symtab, argv[2]);
if (!md) {
fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
return 1;
}
const upb_msgdef *m = upb_dyncast_msgdef_const(md);
if (!m) {
fprintf(stderr, "Def was not a msgdef.\n");
return 1;
}
upb_stdio in, out;
upb_stdio_init(&in);
upb_stdio_init(&out);
upb_stdio_reset(&in, stdin);
upb_stdio_reset(&out, stdout);
upb_handlers *handlers = upb_handlers_new();
upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
upb_textprinter_reghandlers(handlers, m);
upb_decoder d;
upb_decoder_init(&d, handlers);
upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p);
upb_status_clear(&status);
upb_decoder_decode(&d, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing input: %s", upb_status_getstr(&status));
}
upb_status_uninit(&status);
upb_stdio_uninit(&in);
upb_stdio_uninit(&out);
upb_decoder_uninit(&d);
upb_textprinter_free(p);
upb_def_unref(UPB_UPCAST(m));
upb_symtab_unref(symtab);
// Prevent C library from holding buffers open, so Valgrind doesn't see
// memory leaks.
fclose(stdin);
fclose(stdout);
}

@ -9,7 +9,11 @@
#include <stdio.h>
#include <iostream>
#include "upb/bytestream.hpp"
#include "upb/def.hpp"
#include "upb/handlers.hpp"
#include "upb/upb.hpp"
#include "upb/pb/decoder.hpp"
#include "upb/pb/glue.hpp"
static void TestSymbolTable(const char *descriptor_file) {
@ -26,11 +30,22 @@ static void TestSymbolTable(const char *descriptor_file) {
md->Unref();
}
static void TestByteStream() {
upb::StringSource stringsrc;
stringsrc.Reset("testing", 7);
upb::ByteRegion* byteregion = stringsrc.AllBytes();
assert(byteregion->FetchAll() == UPB_BYTE_OK);
char* str = byteregion->StrDup();
assert(strcmp(str, "testing") == 0);
free(str);
}
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: test_cpp <descriptor file>\n");
return 1;
}
TestSymbolTable(argv[1]);
TestByteStream();
return 0;
}

@ -1,76 +1,666 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2011 Google Inc. See LICENSE for details.
*
* An exhaustive set of tests for parsing both valid and invalid protobuf
* input, with buffer breaks in arbitrary places.
*
* Tests to add:
* - unknown field handler called appropriately
* - unknown fields can be inserted in random places
* - fuzzing of valid input
* - resource limits (max stack depth, max string len)
* - testing of groups
* - more throrough testing of sequences
* - test skipping of submessages
* - test suspending the decoder
* - buffers that are close enough to the end of the address space that
* pointers overflow (this might be difficult).
* - a few "kitchen sink" examples (one proto that uses all types, lots
* of submsg/sequences, etc.
*/
#include <inttypes.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdlib.h>
#include "upb/bytestream.h"
#include <string.h>
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/pb/glue.h"
#include "upb/pb/textprinter.h"
#include "upb/pb/varint.h"
#include "upb/upb.h"
#include "upb_test.h"
int main(int argc, char *argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: test_decoder <descfile> <msgname>\n");
return 1;
typedef struct {
char *buf;
size_t len;
} buffer;
// Mem is initialized to NULL.
buffer *buffer_new(size_t len) {
buffer *buf = malloc(sizeof(*buf));
buf->buf = malloc(len);
buf->len = len;
memset(buf->buf, 0, buf->len);
return buf;
}
buffer *buffer_new2(const void *data, size_t len) {
buffer *buf = buffer_new(len);
memcpy(buf->buf, data, len);
return buf;
}
buffer *buffer_new3(const char *data) {
return buffer_new2(data, strlen(data));
}
buffer *buffer_dup(buffer *buf) { return buffer_new2(buf->buf, buf->len); }
void buffer_free(buffer *buf) {
free(buf->buf);
free(buf);
}
void buffer_appendf(buffer *buf, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
size_t size = buf->len;
buf->len += upb_vrprintf(&buf->buf, &size, buf->len, fmt, args);
va_end(args);
}
void buffer_cat(buffer *buf, buffer *buf2) {
size_t newlen = buf->len + buf2->len;
buf->buf = realloc(buf->buf, newlen);
memcpy(buf->buf + buf->len, buf2->buf, buf2->len);
buf->len = newlen;
buffer_free(buf2);
}
bool buffer_eql(buffer *buf, buffer *buf2) {
return buf->len == buf2->len && memcmp(buf->buf, buf2->buf, buf->len) == 0;
}
/* Routines for building arbitrary protos *************************************/
buffer *cat(buffer *arg1, ...) {
va_list ap;
buffer *arg;
va_start(ap, arg1);
while ((arg = va_arg(ap, buffer*)) != NULL) {
buffer_cat(arg1, arg);
}
va_end(ap);
return arg1;
}
buffer *varint(uint64_t x) {
buffer *buf = buffer_new(UPB_PB_VARINT_MAX_LEN + 1);
buf->len = upb_vencode64(x, buf->buf);
return buf;
}
// TODO: proper byte-swapping for big-endian machines.
buffer *fixed32(void *data) { return buffer_new2(data, 4); }
buffer *fixed64(void *data) { return buffer_new2(data, 8); }
buffer *delim(buffer *buf) { return cat( varint(buf->len), buf, NULL ); }
buffer *uint32(uint32_t u32) { return fixed32(&u32); }
buffer *uint64(uint64_t u64) { return fixed64(&u64); }
buffer *flt(float f) { return fixed32(&f); }
buffer *dbl(double d) { return fixed64(&d); }
buffer *zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
buffer *zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
buffer *tag(uint32_t fieldnum, char wire_type) {
return varint((fieldnum << 3) | wire_type);
}
buffer *submsg(uint32_t fn, buffer *buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf), NULL );
}
upb_symtab *symtab = upb_symtab_new();
size_t desc_len;
const char *desc = upb_readfile(argv[1], &desc_len);
if (!desc) {
fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
return 1;
/* A set of handlers that covers all .proto types *****************************/
// The handlers simply append to a string indicating what handlers were called.
// This string is similar to protobuf text format but fields are referred to by
// number instead of name and sequences are explicitly delimited.
#define VALUE_HANDLER(member, fmt) \
upb_flow_t value_ ## member(void *closure, upb_value fval, upb_value val) { \
buffer_appendf(closure, "%" PRIu32 ":%" fmt "; ", \
upb_value_getuint32(fval), upb_value_get ## member(val)); \
return UPB_CONTINUE; \
}
upb_status status = UPB_STATUS_INIT;
upb_load_descriptor_into_symtab(symtab, desc, desc_len, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing descriptor: %s", upb_status_getstr(&status));
return 1;
VALUE_HANDLER(uint32, PRIu32)
VALUE_HANDLER(uint64, PRIu64)
VALUE_HANDLER(int32, PRId32)
VALUE_HANDLER(int64, PRId64)
VALUE_HANDLER(float, "g")
VALUE_HANDLER(double, "g")
upb_flow_t value_bool(void *closure, upb_value fval, upb_value val) {
buffer_appendf(closure, "%" PRIu32 ":%s; ",
upb_value_getuint32(fval),
upb_value_getbool(val) ? "true" : "false");
return UPB_CONTINUE;
}
upb_flow_t value_string(void *closure, upb_value fval, upb_value val) {
// Note: won't work with strings that contain NULL.
char *str = upb_byteregion_strdup(upb_value_getbyteregion(val));
buffer_appendf(closure, "%" PRIu32 ":%s; ", upb_value_getuint32(fval), str);
free(str);
return UPB_CONTINUE;
}
upb_sflow_t startsubmsg(void *closure, upb_value fval) {
buffer_appendf(closure, "%" PRIu32 ":{ ", upb_value_getuint32(fval));
return UPB_CONTINUE_WITH(closure);
}
upb_flow_t endsubmsg(void *closure, upb_value fval) {
buffer_appendf(closure, "} ");
return UPB_CONTINUE;
}
upb_sflow_t startseq(void *closure, upb_value fval) {
buffer_appendf(closure, "%" PRIu32 ":[ ", upb_value_getuint32(fval));
return UPB_CONTINUE_WITH(closure);
}
upb_flow_t endseq(void *closure, upb_value fval) {
buffer_appendf(closure, "] ");
return UPB_CONTINUE;
}
void doreg(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type, bool repeated,
upb_value_handler *handler) {
upb_fhandlers *f = upb_mhandlers_newfhandlers(m, num, type, repeated);
ASSERT(f);
upb_fhandlers_setvalue(f, handler);
upb_fhandlers_setstartseq(f, &startseq);
upb_fhandlers_setendseq(f, &endseq);
upb_fhandlers_setfval(f, upb_value_uint32(num));
}
// The repeated field number to correspond to the given non-repeated field
// number.
uint32_t rep_fn(uint32_t fn) {
return (UPB_MAX_FIELDNUMBER - 1000) + fn;
}
#define NOP_FIELD 40
#define UNKNOWN_FIELD 666
void reg(upb_mhandlers *m, upb_fieldtype_t type, upb_value_handler *handler) {
// We register both a repeated and a non-repeated field for every type.
// For the non-repeated field we make the field number the same as the
// type. For the repeated field we make it a function of the type.
doreg(m, type, type, false, handler);
doreg(m, rep_fn(type), type, true, handler);
}
void reg_subm(upb_mhandlers *m, uint32_t num, upb_fieldtype_t type,
bool repeated) {
upb_fhandlers *f =
upb_mhandlers_newfhandlers_subm(m, num, type, repeated, m);
ASSERT(f);
upb_fhandlers_setstartseq(f, &startseq);
upb_fhandlers_setendseq(f, &endseq);
upb_fhandlers_setstartsubmsg(f, &startsubmsg);
upb_fhandlers_setendsubmsg(f, &endsubmsg);
upb_fhandlers_setfval(f, upb_value_uint32(num));
}
void reghandlers(upb_mhandlers *m) {
// Register handlers for each type.
reg(m, UPB_TYPE(DOUBLE), &value_double);
reg(m, UPB_TYPE(FLOAT), &value_float);
reg(m, UPB_TYPE(INT64), &value_int64);
reg(m, UPB_TYPE(UINT64), &value_uint64);
reg(m, UPB_TYPE(INT32) , &value_int32);
reg(m, UPB_TYPE(FIXED64), &value_uint64);
reg(m, UPB_TYPE(FIXED32), &value_uint32);
reg(m, UPB_TYPE(BOOL), &value_bool);
reg(m, UPB_TYPE(STRING), &value_string);
reg(m, UPB_TYPE(BYTES), &value_string);
reg(m, UPB_TYPE(UINT32), &value_uint32);
reg(m, UPB_TYPE(ENUM), &value_int32);
reg(m, UPB_TYPE(SFIXED32), &value_int32);
reg(m, UPB_TYPE(SFIXED64), &value_int64);
reg(m, UPB_TYPE(SINT32), &value_int32);
reg(m, UPB_TYPE(SINT64), &value_int64);
// Register submessage/group handlers that are self-recursive
// to this type, eg: message M { optional M m = 1; }
reg_subm(m, UPB_TYPE(MESSAGE), UPB_TYPE(MESSAGE), false);
reg_subm(m, UPB_TYPE(GROUP), UPB_TYPE(GROUP), false);
reg_subm(m, rep_fn(UPB_TYPE(MESSAGE)), UPB_TYPE(MESSAGE), true);
reg_subm(m, rep_fn(UPB_TYPE(GROUP)), UPB_TYPE(GROUP), true);
// Register a no-op string field so we can pad the proto wherever we want.
upb_mhandlers_newfhandlers(m, NOP_FIELD, UPB_TYPE(STRING), false);
}
/* Custom bytesrc that can insert buffer seams in arbitrary places ************/
typedef struct {
upb_bytesrc bytesrc;
const char *str;
size_t len, seam1, seam2;
upb_byteregion byteregion;
} upb_seamsrc;
size_t upb_seamsrc_avail(const upb_seamsrc *src, size_t ofs) {
if (ofs < src->seam1) return src->seam1 - ofs;
if (ofs < src->seam2) return src->seam2 - ofs;
return src->len - ofs;
}
upb_bytesuccess_t upb_seamsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
upb_seamsrc *src = _src;
assert(ofs < src->len);
if (ofs == src->len) {
upb_status_seteof(&src->bytesrc.status);
return UPB_BYTE_EOF;
}
free((void*)desc);
*read = upb_seamsrc_avail(src, ofs);
return UPB_BYTE_OK;
}
const upb_def *md = upb_symtab_lookup(symtab, argv[2]);
if (!md) {
fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
return 1;
void upb_seamsrc_copy(const void *_src, uint64_t ofs,
size_t len, char *dst) {
const upb_seamsrc *src = _src;
assert(ofs + len <= src->len);
memcpy(dst, src->str + ofs, len);
}
void upb_seamsrc_discard(void *src, uint64_t ofs) {
(void)src;
(void)ofs;
}
const char *upb_seamsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
const upb_seamsrc *src = _s;
*len = upb_seamsrc_avail(src, ofs);
return src->str + ofs;
}
void upb_seamsrc_init(upb_seamsrc *s, const char *str, size_t len) {
static upb_bytesrc_vtbl vtbl = {
&upb_seamsrc_fetch,
&upb_seamsrc_discard,
&upb_seamsrc_copy,
&upb_seamsrc_getptr,
};
upb_bytesrc_init(&s->bytesrc, &vtbl);
s->seam1 = 0;
s->seam2 = 0;
s->str = str;
s->len = len;
s->byteregion.bytesrc = &s->bytesrc;
s->byteregion.toplevel = true;
s->byteregion.start = 0;
s->byteregion.end = len;
}
void upb_seamsrc_resetseams(upb_seamsrc *s, size_t seam1, size_t seam2) {
ASSERT(seam1 <= seam2);
s->seam1 = seam1;
s->seam2 = seam2;
s->byteregion.discard = 0;
s->byteregion.fetch = 0;
}
void upb_seamsrc_uninit(upb_seamsrc *s) { (void)s; }
upb_bytesrc *upb_seamsrc_bytesrc(upb_seamsrc *s) {
return &s->bytesrc;
}
// Returns the top-level upb_byteregion* for this seamsrc. Invalidated when
// the seamsrc is reset.
upb_byteregion *upb_seamsrc_allbytes(upb_seamsrc *s) {
return &s->byteregion;
}
/* Running of test cases ******************************************************/
upb_decoderplan *plan;
void run_decoder(buffer *proto, buffer *expected_output) {
upb_seamsrc src;
upb_seamsrc_init(&src, proto->buf, proto->len);
upb_decoder d;
upb_decoder_init(&d);
upb_decoder_resetplan(&d, plan, 0);
for (size_t i = 0; i < proto->len; i++) {
for (size_t j = i; j < proto->len; j++) {
upb_seamsrc_resetseams(&src, i, j);
upb_byteregion *input = upb_seamsrc_allbytes(&src);
buffer *output = buffer_new(0);
upb_decoder_resetinput(&d, input, output);
upb_success_t success = UPB_SUSPENDED;
while (success == UPB_SUSPENDED)
success = upb_decoder_decode(&d);
ASSERT(upb_ok(upb_decoder_status(&d)) == (success == UPB_OK));
if (expected_output) {
ASSERT(success == UPB_OK);
// The input should be fully consumed.
ASSERT(upb_byteregion_fetchofs(input) == upb_byteregion_endofs(input));
ASSERT(upb_byteregion_discardofs(input) ==
upb_byteregion_endofs(input));
if (!buffer_eql(output, expected_output)) {
fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
output->buf, expected_output->buf);
}
ASSERT(strcmp(output->buf, expected_output->buf) == 0);
} else {
ASSERT(success == UPB_ERROR);
}
buffer_free(output);
}
}
upb_seamsrc_uninit(&src);
upb_decoder_uninit(&d);
buffer_free(proto);
}
void assert_successful_parse_at_eof(buffer *proto, const char *expected_fmt,
va_list args) {
buffer *expected_text = buffer_new(0);
size_t size = expected_text->len;
expected_text->len += upb_vrprintf(&expected_text->buf, &size,
expected_text->len, expected_fmt, args);
run_decoder(proto, expected_text);
buffer_free(expected_text);
}
void assert_does_not_parse_at_eof(buffer *proto) {
run_decoder(proto, NULL);
}
void assert_successful_parse(buffer *proto, const char *expected_fmt, ...) {
// The JIT is only used for data >=20 bytes from end-of-buffer, so
// repeat once with no-op padding data at the end of buffer.
va_list args, args2;
va_start(args, expected_fmt);
va_copy(args2, args);
assert_successful_parse_at_eof(buffer_dup(proto), expected_fmt, args);
assert_successful_parse_at_eof(
cat( proto,
tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(buffer_new(30)),
NULL ),
expected_fmt, args2);
va_end(args);
va_end(args2);
}
void assert_does_not_parse(buffer *proto) {
// The JIT is only used for data >=20 bytes from end-of-buffer, so
// repeat once with no-op padding data at the end of buffer.
assert_does_not_parse_at_eof(buffer_dup(proto));
assert_does_not_parse_at_eof(
cat( proto,
tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim( buffer_new(30)),
NULL ));
}
/* The actual tests ***********************************************************/
void test_premature_eof_for_type(upb_fieldtype_t type) {
// Incomplete values for each wire type.
static const char *incompletes[] = {
"\x80", // UPB_WIRE_TYPE_VARINT
"abcdefg", // UPB_WIRE_TYPE_64BIT
"\x80", // UPB_WIRE_TYPE_DELIMITED (partial length)
NULL, // UPB_WIRE_TYPE_START_GROUP (no value required)
NULL, // UPB_WIRE_TYPE_END_GROUP (no value required)
"abc" // UPB_WIRE_TYPE_32BIT
};
uint32_t fieldnum = type;
uint32_t rep_fieldnum = rep_fn(type);
int wire_type = upb_types[type].native_wire_type;
const char *incomplete = incompletes[wire_type];
// EOF before a known non-repeated value.
assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
// EOF before a known repeated value.
assert_does_not_parse_at_eof(tag(rep_fieldnum, wire_type));
// EOF before an unknown value.
assert_does_not_parse_at_eof(tag(UNKNOWN_FIELD, wire_type));
// EOF inside a known non-repeated value.
assert_does_not_parse_at_eof(
cat( tag(fieldnum, wire_type), buffer_new3(incomplete), NULL ));
// EOF inside a known repeated value.
assert_does_not_parse_at_eof(
cat( tag(rep_fieldnum, wire_type), buffer_new3(incomplete), NULL ));
// EOF inside an unknown value.
assert_does_not_parse_at_eof(
cat( tag(UNKNOWN_FIELD, wire_type), buffer_new3(incomplete), NULL ));
if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
// EOF in the middle of delimited data for known non-repeated value.
assert_does_not_parse_at_eof(
cat( tag(fieldnum, wire_type), varint(1), NULL ));
// EOF in the middle of delimited data for known repeated value.
assert_does_not_parse_at_eof(
cat( tag(rep_fieldnum, wire_type), varint(1), NULL ));
// EOF in the middle of delimited data for unknown value.
assert_does_not_parse_at_eof(
cat( tag(UNKNOWN_FIELD, wire_type), varint(1), NULL ));
if (type == UPB_TYPE(MESSAGE)) {
// Submessage ends in the middle of a value.
buffer *incomplete_submsg =
cat ( tag(UPB_TYPE(INT32), UPB_WIRE_TYPE_VARINT),
buffer_new3(incompletes[UPB_WIRE_TYPE_VARINT]), NULL );
assert_does_not_parse(
cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
varint(incomplete_submsg->len),
incomplete_submsg, NULL ));
}
} else {
// Packed region ends in the middle of a value.
assert_does_not_parse(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
varint(strlen(incomplete)),
buffer_new3(incomplete), NULL ));
const upb_msgdef *m = upb_dyncast_msgdef_const(md);
if (!m) {
fprintf(stderr, "Def was not a msgdef.\n");
return 1;
// EOF in the middle of packed region.
assert_does_not_parse_at_eof(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1), NULL ));
}
}
upb_stdio in, out;
upb_stdio_init(&in);
upb_stdio_init(&out);
upb_stdio_reset(&in, stdin);
upb_stdio_reset(&out, stdout);
// "33" and "66" are just two random values that all numeric types can
// represent.
void test_valid_data_for_type(upb_fieldtype_t type,
buffer *enc33, buffer *enc66) {
uint32_t fieldnum = type;
uint32_t rep_fieldnum = rep_fn(type);
int wire_type = upb_types[type].native_wire_type;
upb_handlers *handlers = upb_handlers_new();
upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
upb_textprinter_reghandlers(handlers, m);
// Non-repeated
assert_successful_parse(
cat( tag(fieldnum, wire_type), buffer_dup(enc33),
tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
"%u:33; %u:66; ", fieldnum, fieldnum);
upb_decoder d;
upb_decoder_init(&d, handlers);
upb_decoder_reset(&d, upb_stdio_allbytes(&in), p);
// Non-packed repeated.
assert_successful_parse(
cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
"%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
// Packed repeated.
assert_successful_parse(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
"%u:[ %u:33; %u:66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
buffer_free(enc33);
buffer_free(enc66);
}
void test_valid_data_for_signed_type(upb_fieldtype_t type,
buffer *enc33, buffer *enc66) {
uint32_t fieldnum = type;
uint32_t rep_fieldnum = rep_fn(type);
int wire_type = upb_types[type].native_wire_type;
// Non-repeated
assert_successful_parse(
cat( tag(fieldnum, wire_type), buffer_dup(enc33),
tag(fieldnum, wire_type), buffer_dup(enc66), NULL ),
"%u:33; %u:-66; ", fieldnum, fieldnum);
// Non-packed repeated.
assert_successful_parse(
cat( tag(rep_fieldnum, wire_type), buffer_dup(enc33),
tag(rep_fieldnum, wire_type), buffer_dup(enc66), NULL ),
"%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
// Packed repeated.
assert_successful_parse(
cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
delim(cat( buffer_dup(enc33), buffer_dup(enc66), NULL )), NULL ),
"%u:[ %u:33; %u:-66; ] ", rep_fieldnum, rep_fieldnum, rep_fieldnum);
buffer_free(enc33);
buffer_free(enc66);
}
// Test that invalid protobufs are properly detected (without crashing) and
// have an error reported. Field numbers match registered handlers above.
void test_invalid() {
test_premature_eof_for_type(UPB_TYPE(DOUBLE));
test_premature_eof_for_type(UPB_TYPE(FLOAT));
test_premature_eof_for_type(UPB_TYPE(INT64));
test_premature_eof_for_type(UPB_TYPE(UINT64));
test_premature_eof_for_type(UPB_TYPE(INT32));
test_premature_eof_for_type(UPB_TYPE(FIXED64));
test_premature_eof_for_type(UPB_TYPE(FIXED32));
test_premature_eof_for_type(UPB_TYPE(BOOL));
test_premature_eof_for_type(UPB_TYPE(STRING));
test_premature_eof_for_type(UPB_TYPE(BYTES));
test_premature_eof_for_type(UPB_TYPE(UINT32));
test_premature_eof_for_type(UPB_TYPE(ENUM));
test_premature_eof_for_type(UPB_TYPE(SFIXED32));
test_premature_eof_for_type(UPB_TYPE(SFIXED64));
test_premature_eof_for_type(UPB_TYPE(SINT32));
test_premature_eof_for_type(UPB_TYPE(SINT64));
// EOF inside a tag's varint.
assert_does_not_parse_at_eof( buffer_new3("\x80") );
// EOF inside a known group.
assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
// EOF inside an unknown group.
assert_does_not_parse_at_eof( tag(UNKNOWN_FIELD, UPB_WIRE_TYPE_START_GROUP) );
upb_status_clear(&status);
upb_decoder_decode(&d, &status);
// End group that we are not currently in.
assert_does_not_parse( tag(4, UPB_WIRE_TYPE_END_GROUP) );
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing input: %s", upb_status_getstr(&status));
// Field number is 0.
assert_does_not_parse(
cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0), NULL ));
// Field number is too large.
assert_does_not_parse(
cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
varint(0), NULL ));
// Test exceeding the resource limit of stack depth.
buffer *buf = buffer_new3("");
for (int i = 0; i < UPB_MAX_NESTING; i++) {
buf = submsg(UPB_TYPE(MESSAGE), buf);
}
assert_does_not_parse(buf);
upb_status_uninit(&status);
upb_stdio_uninit(&in);
upb_stdio_uninit(&out);
upb_decoder_uninit(&d);
upb_textprinter_free(p);
upb_def_unref(UPB_UPCAST(m));
upb_symtab_unref(symtab);
// Prevent C library from holding buffers open, so Valgrind doesn't see
// memory leaks.
fclose(stdin);
fclose(stdout);
// Staying within the stack limit should work properly.
buf = buffer_new3("");
buffer *textbuf = buffer_new3("");
int total = UPB_MAX_NESTING - 1;
for (int i = 0; i < total; i++) {
buf = submsg(UPB_TYPE(MESSAGE), buf);
buffer_appendf(textbuf, "%u:{ ", UPB_TYPE(MESSAGE));
}
for (int i = 0; i < total; i++) {
buffer_appendf(textbuf, "} ");
}
assert_successful_parse(buf, "%s", textbuf->buf);
buffer_free(textbuf);
}
void test_valid() {
test_valid_data_for_signed_type(UPB_TYPE(DOUBLE), dbl(33), dbl(-66));
test_valid_data_for_signed_type(UPB_TYPE(FLOAT), flt(33), flt(-66));
test_valid_data_for_signed_type(UPB_TYPE(INT64), varint(33), varint(-66));
test_valid_data_for_signed_type(UPB_TYPE(INT32), varint(33), varint(-66));
test_valid_data_for_signed_type(UPB_TYPE(ENUM), varint(33), varint(-66));
test_valid_data_for_signed_type(UPB_TYPE(SFIXED32), uint32(33), uint32(-66));
test_valid_data_for_signed_type(UPB_TYPE(SFIXED64), uint64(33), uint64(-66));
test_valid_data_for_signed_type(UPB_TYPE(SINT32), zz32(33), zz32(-66));
test_valid_data_for_signed_type(UPB_TYPE(SINT64), zz64(33), zz64(-66));
test_valid_data_for_type(UPB_TYPE(UINT64), varint(33), varint(66));
test_valid_data_for_type(UPB_TYPE(UINT32), varint(33), varint(66));
test_valid_data_for_type(UPB_TYPE(FIXED64), uint64(33), uint64(66));
test_valid_data_for_type(UPB_TYPE(FIXED32), uint32(33), uint32(66));
// Submessage tests.
uint32_t msg_fn = UPB_TYPE(MESSAGE);
assert_successful_parse(
submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, buffer_new3("")))),
"%u:{ %u:{ %u:{ } } } ", msg_fn, msg_fn, msg_fn);
uint32_t repm_fn = rep_fn(UPB_TYPE(MESSAGE));
assert_successful_parse(
submsg(repm_fn, submsg(repm_fn, buffer_new3(""))),
"%u:[ %u:{ %u:[ %u:{ } ] } ] ", repm_fn, repm_fn, repm_fn, repm_fn);
}
void run_tests() {
test_invalid();
test_valid();
}
int main() {
// Construct decoder plan.
upb_handlers *h = upb_handlers_new();
reghandlers(upb_handlers_newmhandlers(h));
// Test without JIT.
plan = upb_decoderplan_new(h, false);
run_tests();
upb_decoderplan_unref(plan);
// Test JIT.
plan = upb_decoderplan_new(h, true);
run_tests();
upb_decoderplan_unref(plan);
plan = NULL;
printf("All tests passed, %d assertions.\n", num_assertions);
upb_handlers_unref(h);
return 0;
}

@ -8,12 +8,39 @@
#include "upb/pb/varint.h"
#include "upb_test.h"
// Test that we can round-trip from int->varint->int.
static void test_varint_for_num(upb_decoderet (*decoder)(const char*),
uint64_t num) {
char buf[16];
memset(buf, 0xff, sizeof(buf));
size_t bytes = upb_vencode64(num, buf);
if (num <= UINT32_MAX) {
char buf2[16];
memset(buf2, 0, sizeof(buf2));
uint64_t encoded = upb_vencode32(num);
memcpy(&buf2, &encoded, 8);
upb_decoderet r = decoder(buf2);
ASSERT(r.val == num);
ASSERT(r.p == buf2 + upb_value_size(encoded));
ASSERT(upb_zzenc_32(upb_zzdec_32(num)) == num);
}
upb_decoderet r = decoder(buf);
ASSERT(r.val == num);
ASSERT(r.p == buf + bytes);
ASSERT(upb_zzenc_64(upb_zzdec_64(num)) == num);
}
static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
#define TEST(bytes, expected_val) {\
const char buf[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
size_t n = sizeof(bytes) - 1; /* for NULL */ \
char buf[UPB_PB_VARINT_MAX_LEN]; \
memset(buf, 0xff, sizeof(buf)); \
memcpy(buf, bytes, n); \
upb_decoderet r = decoder(buf); \
ASSERT(r.val == expected_val); \
ASSERT(r.p == buf + sizeof(buf) - 16); /* - 1 for NULL */ \
ASSERT(r.p == buf + n); \
}
TEST("\x00", 0ULL);
@ -30,12 +57,19 @@ static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
#undef TEST
char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x01, 0x01};
const char *twelvebyte_buf = twelvebyte;
// A varint that terminates before hitting the end of the provided buffer,
// but in too many bytes (11 instead of 10).
upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
test_varint_for_num(decoder, num);
}
test_varint_for_num(decoder, 0);
}

@ -7,15 +7,19 @@
* given proto type and input protobuf.
*/
#define __STDC_LIMIT_MACROS // So we get UINT32_MAX
#include <assert.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
#include "benchmarks/google_messages.pb.h"
#include <google/protobuf/wire_format_lite.h>
#include "upb/benchmarks/google_messages.pb.h"
#include "upb/def.h"
#include "upb/msg.h"
#include "upb/pb/glue.h"
#include "upb/pb/varint.h"
#include "upb_test.h"
size_t string_size;
@ -179,13 +183,13 @@ void compare(const google::protobuf::Message& proto2_msg,
void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
void *upb_msg, const upb_msgdef *upb_md,
const char *str, size_t len)
const char *str, size_t len, bool allow_jit)
{
// Parse to both proto2 and upb.
ASSERT(proto2_msg->ParseFromArray(str, len));
upb_status status = UPB_STATUS_INIT;
upb_msg_clear(upb_msg, upb_md);
upb_strtomsg(str, len, upb_msg, upb_md, &status);
upb_strtomsg(str, len, upb_msg, upb_md, allow_jit, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing protobuf: %s", upb_status_getstr(&status));
exit(1);
@ -241,8 +245,10 @@ int main(int argc, char *argv[])
// Run twice to test proper object reuse.
MESSAGE_CIDENT proto2_msg;
void *upb_msg = upb_stdmsg_new(msgdef);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, true);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len, false);
printf("All tests passed, %d assertions.\n", num_assertions);
upb_stdmsg_free(upb_msg, msgdef);
@ -250,6 +256,17 @@ int main(int argc, char *argv[])
free((void*)str);
upb_symtab_unref(symtab);
upb_status_uninit(&status);
// Test Zig-Zag encoding/decoding.
for (uint64_t num = 5; num * 1.5 > num; num *= 1.5) {
ASSERT(upb_zzenc_64(num) ==
google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
if (num < UINT32_MAX) {
ASSERT(upb_zzenc_32(num) ==
google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
}
}
google::protobuf::ShutdownProtobufLibrary();
return 0;

@ -39,9 +39,13 @@ static void test_upb_jit() {
upb_handlers *h = upb_handlers_new();
upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
upb_handlers_reghandlerset(h, upb_downcast_msgdef_const(def), &hset);
upb_decoder d;
upb_decoder_init(&d, h);
upb_decoder_uninit(&d);
upb_decoderplan *p = upb_decoderplan_new(h, true);
#ifdef UPB_USE_JIT_X64
ASSERT(upb_decoderplan_hasjitcode(p));
#else
ASSERT(!upb_decoderplan_hasjitcode(p));
#endif
upb_decoderplan_unref(p);
upb_symtab_unref(s);
upb_def_unref(def);
upb_handlers_unref(h);

@ -25,7 +25,7 @@ upb_byteregion *upb_byteregion_new(const void *str) {
return upb_byteregion_newl(str, strlen(str));
}
upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
upb_byteregion *upb_byteregion_newl(const void *str, size_t len) {
upb_stringsrc *src = malloc(sizeof(*src));
upb_stringsrc_init(src);
char *ptr = malloc(len + 1);
@ -37,7 +37,7 @@ upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len) {
void upb_byteregion_free(upb_byteregion *r) {
if (!r) return;
uint32_t len;
size_t len;
free((char*)upb_byteregion_getptr(r, 0, &len));
upb_stringsrc_uninit((upb_stringsrc*)r->bytesrc);
free(r->bytesrc);
@ -64,16 +64,14 @@ void upb_byteregion_reset(upb_byteregion *r, const upb_byteregion *src,
r->fetch = UPB_MIN(src->fetch, r->end);
}
bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s) {
upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r) {
uint64_t fetchable = upb_byteregion_remaining(r, r->fetch);
if (fetchable == 0) {
upb_status_seteof(s);
return false;
}
uint64_t num = upb_bytesrc_fetch(r->bytesrc, r->fetch, s);
if (num == 0) return false;
r->fetch += UPB_MIN(num, fetchable);
return true;
if (fetchable == 0) return UPB_BYTE_EOF;
size_t fetched;
upb_bytesuccess_t ret = upb_bytesrc_fetch(r->bytesrc, r->fetch, &fetched);
if (ret != UPB_BYTE_OK) return false;
r->fetch += UPB_MIN(fetched, fetchable);
return UPB_BYTE_OK;
}
@ -93,10 +91,10 @@ static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
upb_stdio_buf **reuse = NULL; // XXX
uint32_t num_reused = 0, num_inuse = 0;
int num_reused = 0, num_inuse = 0;
// Could sweep only a subset of bufs if this was a hotspot.
for (uint32_t i = 0; i < s->nbuf; i++) {
for (int i = 0; i < s->nbuf; i++) {
upb_stdio_buf *buf = s->bufs[i];
if (buf->refcount > 0) {
s->bufs[num_inuse++] = buf;
@ -120,28 +118,37 @@ void upb_stdio_discard(void *src, uint64_t ofs) {
(void)ofs;
}
uint32_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) {
(void)ofs;
upb_stdio *stdio = (upb_stdio*)src;
upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio);
uint32_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
buf->len = read;
if(read < (uint32_t)BUF_SIZE) {
retry:
*bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
buf->len = *bytes_read;
if (*bytes_read < (size_t)BUF_SIZE) {
// Error or EOF.
if(feof(stdio->file)) {
upb_status_seteof(s);
return read;
if (feof(stdio->file)) {
upb_status_seteof(&stdio->src.status);
return UPB_BYTE_EOF;
}
if(ferror(stdio->file)) {
upb_status_fromerrno(s);
return 0;
if (ferror(stdio->file)) {
#ifdef EINTR
// If we encounter a client who doesn't want to retry EINTR, we can easily
// add a boolean property of the stdio that controls this behavior.
if (errno == EINTR) {
clearerr(stdio->file);
goto retry;
}
#endif
upb_status_fromerrno(&stdio->src.status);
return upb_errno_is_wouldblock() ? UPB_BYTE_WOULDBLOCK : UPB_BYTE_ERROR;
}
assert(false);
}
return buf->ofs + buf->len;
return UPB_BYTE_OK;
}
void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
memcpy(dst, buf->data + ofs, BUF_SIZE - ofs);
@ -149,14 +156,14 @@ void upb_stdio_read(const void *src, uint64_t ofs, uint32_t len, char *dst) {
dst += (BUF_SIZE - ofs);
while (len > 0) {
++buf;
uint32_t bytes = UPB_MIN(len, BUF_SIZE);
size_t bytes = UPB_MIN(len, BUF_SIZE);
memcpy(dst, buf->data, bytes);
len -= bytes;
dst += bytes;
}
}
const char *upb_stdio_getptr(const void *src, uint64_t ofs, uint32_t *len) {
const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
*len = BUF_SIZE - ofs;
@ -168,7 +175,7 @@ upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *s
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if(written < len) {
if (written < len) {
upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
@ -191,7 +198,7 @@ void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
&upb_stdio_fetch,
&upb_stdio_discard,
&upb_stdio_read,
&upb_stdio_copy,
&upb_stdio_getptr,
};
upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
@ -226,20 +233,25 @@ void upb_stdio_uninit(upb_stdio *stdio) {
stdio->file = NULL;
}
upb_byteregion* upb_stdio_allbytes(upb_stdio *stdio) { return &stdio->byteregion; }
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
/* upb_stringsrc **************************************************************/
uint32_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
upb_bytesuccess_t upb_stringsrc_fetch(void *_src, uint64_t ofs, size_t *read) {
upb_stringsrc *src = _src;
upb_status_seteof(s);
return src->len - ofs;
assert(ofs < src->len);
if (ofs == src->len) {
upb_status_seteof(&src->bytesrc.status);
return UPB_BYTE_EOF;
}
*read = src->len - ofs;
return UPB_BYTE_OK;
}
void upb_stringsrc_read(const void *_src, uint64_t ofs,
uint32_t len, char *dst) {
void upb_stringsrc_copy(const void *_src, uint64_t ofs,
size_t len, char *dst) {
const upb_stringsrc *src = _src;
assert(ofs + len <= src->len);
memcpy(dst, src->str + ofs, len);
@ -250,7 +262,7 @@ void upb_stringsrc_discard(void *src, uint64_t ofs) {
(void)ofs;
}
const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, uint32_t *len) {
const char *upb_stringsrc_getptr(const void *_s, uint64_t ofs, size_t *len) {
const upb_stringsrc *src = _s;
*len = src->len - ofs;
return src->str + ofs;
@ -260,7 +272,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl vtbl = {
&upb_stringsrc_fetch,
&upb_stringsrc_discard,
&upb_stringsrc_read,
&upb_stringsrc_copy,
&upb_stringsrc_getptr,
};
upb_bytesrc_init(&s->bytesrc, &vtbl);
@ -269,7 +281,7 @@ void upb_stringsrc_init(upb_stringsrc *s) {
s->byteregion.toplevel = true;
}
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
s->str = str;
s->len = len;
s->byteregion.start = 0;
@ -280,18 +292,13 @@ void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len) {
void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc;
}
/* upb_stringsink *************************************************************/
void upb_stringsink_uninit(upb_stringsink *s) {
free(s->str);
}
void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t size) {
void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
free(s->str);
s->str = str;
s->len = 0;

@ -63,11 +63,17 @@
// +------------------------
// | nondelimited region Z <-- won't return EOF until data source hits EOF.
// +------------------------
//
// TODO: if 64-bit math for stream offsets is a performance issue on
// non-64-bit machines, we could introduce a upb_off_t typedef that can be
// defined as a 32-bit type for applications that don't need to handle
// streams longer than 4GB.
#ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H
#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
@ -79,6 +85,12 @@
extern "C" {
#endif
typedef enum {
UPB_BYTE_OK = UPB_OK,
UPB_BYTE_WOULDBLOCK = UPB_SUSPENDED,
UPB_BYTE_ERROR = UPB_ERROR,
UPB_BYTE_EOF
} upb_bytesuccess_t;
/* upb_bytesrc ****************************************************************/
@ -90,10 +102,10 @@ extern "C" {
// upb_bytesrc is a virtual base class with implementations that get data from
// eg. a string, a cord, a file descriptor, a FILE*, etc.
typedef uint32_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
typedef upb_bytesuccess_t upb_bytesrc_fetch_func(void*, uint64_t, size_t*);
typedef void upb_bytesrc_discard_func(void*, uint64_t);
typedef void upb_bytesrc_copy_func(const void*, uint64_t, uint32_t, char*);
typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, uint32_t*);
typedef void upb_bytesrc_copy_func(const void*, uint64_t, size_t, char*);
typedef const char *upb_bytesrc_getptr_func(const void*, uint64_t, size_t*);
typedef struct _upb_bytesrc_vtbl {
upb_bytesrc_fetch_func *fetch;
upb_bytesrc_discard_func *discard;
@ -102,21 +114,27 @@ typedef struct _upb_bytesrc_vtbl {
} upb_bytesrc_vtbl;
typedef struct {
upb_bytesrc_vtbl *vtbl;
const upb_bytesrc_vtbl *vtbl;
upb_status status;
} upb_bytesrc;
INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
INLINE void upb_bytesrc_init(upb_bytesrc *src, const upb_bytesrc_vtbl *vtbl) {
src->vtbl = vtbl;
upb_status_init(&src->status);
}
INLINE void upb_bytesrc_uninit(upb_bytesrc *src) {
upb_status_uninit(&src->status);
}
// Fetches at least one byte starting at ofs, returning the actual number of
// bytes fetched (or 0 on EOF or error: see *s for details). Some bytesrc's
// may set EOF on *s after a successful read if no further data is available,
// but not all bytesrc's support this. It is valid for bytes to be fetched
// multiple times, as long as the bytes have not been previously discarded.
INLINE uint32_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
upb_status *s) {
return src->vtbl->fetch(src, ofs, s);
// Fetches at least one byte starting at ofs, returning the success or failure
// of the operation. If UPB_BYTE_OK is returned, *read indicates the number of
// of bytes successfully fetched; any error or EOF status will be reflected in
// upb_bytesrc_status(). It is valid for bytes to be fetched multiple times,
// as long as the bytes have not been previously discarded.
INLINE upb_bytesuccess_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs,
size_t *read) {
return src->vtbl->fetch(src, ofs, read);
}
// Discards all data prior to ofs (except data that is pinned, if pinning
@ -127,7 +145,7 @@ INLINE void upb_bytesrc_discard(upb_bytesrc *src, uint64_t ofs) {
// Copies "len" bytes of data from ofs to "dst", which must be at least "len"
// bytes long. The given region must not be discarded.
INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, size_t len,
char *dst) {
src->vtbl->copy(src, ofs, len, dst);
}
@ -138,7 +156,7 @@ INLINE void upb_bytesrc_copy(const upb_bytesrc *src, uint64_t ofs, uint32_t len,
// part of the returned buffer is discarded, only the non-discarded bytes
// remain valid).
INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
uint32_t *len) {
size_t *len) {
return src->vtbl->getptr(src, ofs, len);
}
@ -148,14 +166,14 @@ INLINE const char *upb_bytesrc_getptr(const upb_bytesrc *src, uint64_t ofs,
// // is guaranteed that the region will not be discarded (nor will the bytesrc
// // be destroyed) until the region is unpinned. However, not all bytesrc's
// // support pinning; a false return indicates that a pin was not possible.
// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, uint32_t len) {
// INLINE bool upb_bytesrc_pin(upb_bytesrc *src, uint64_t ofs, size_t len) {
// return src->vtbl->refregion(src, ofs, len);
// }
//
// // Releases some number of pinned bytes from the beginning of a pinned
// // region (which may be fewer than the total number of bytes pinned).
// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, uint32_t len,
// uint32_t bytes_to_release) {
// INLINE void upb_bytesrc_unpin(upb_bytesrc *src, uint64_t ofs, size_t len,
// size_t bytes_to_release) {
// src->vtbl->unpin(src, ofs, len);
// }
//
@ -173,7 +191,7 @@ typedef struct _upb_byteregion {
uint64_t fetch;
uint64_t end; // UPB_NONDELIMITED if nondelimited.
upb_bytesrc *bytesrc;
bool toplevel; // If true, discards hit the underlying byteregion.
bool toplevel; // If true, discards hit the underlying bytesrc.
} upb_byteregion;
// Initializes a byteregion. Its initial value will be empty. No methods may
@ -225,14 +243,17 @@ void upb_byteregion_release(upb_byteregion *r);
// Attempts to fetch more data, extending the fetched range of this byteregion.
// Returns true if the fetched region was extended by at least one byte, false
// on EOF or error (see *s for details).
bool upb_byteregion_fetch(upb_byteregion *r, upb_status *s);
upb_bytesuccess_t upb_byteregion_fetch(upb_byteregion *r);
// Fetches all remaining data for "r", returning false if the operation failed
// (see "*s" for details). May only be used on delimited byteregions.
INLINE bool upb_byteregion_fetchall(upb_byteregion *r, upb_status *s) {
// Fetches all remaining data for "r", returning the success of the operation
// May only be used on delimited byteregions.
INLINE upb_bytesuccess_t upb_byteregion_fetchall(upb_byteregion *r) {
assert(upb_byteregion_len(r) != UPB_NONDELIMITED);
while (upb_byteregion_fetch(r, s)) ; // Empty body.
return upb_eof(s);
upb_bytesuccess_t ret;
do {
ret = upb_byteregion_fetch(r);
} while (ret == UPB_BYTE_OK);
return ret == UPB_BYTE_EOF ? UPB_BYTE_OK : ret;
}
// Discards bytes from the byteregion up until ofs (which must be greater or
@ -243,13 +264,14 @@ INLINE void upb_byteregion_discard(upb_byteregion *r, uint64_t ofs) {
assert(ofs >= upb_byteregion_discardofs(r));
assert(ofs <= upb_byteregion_endofs(r));
r->discard = ofs;
if (ofs > r->fetch) r->fetch = ofs;
if (r->toplevel) upb_bytesrc_discard(r->bytesrc, ofs);
}
// Copies "len" bytes of data into "dst", starting at ofs. The specified
// region must be available.
INLINE void upb_byteregion_copy(const upb_byteregion *r, uint64_t ofs,
uint32_t len, char *dst) {
size_t len, char *dst) {
assert(ofs >= upb_byteregion_discardofs(r));
assert(len <= upb_byteregion_available(r, ofs));
upb_bytesrc_copy(r->bytesrc, ofs, len, dst);
@ -268,7 +290,7 @@ INLINE void upb_byteregion_copyall(const upb_byteregion *r, char *dst) {
// or when the bytes are discarded. If the byteregion is not currently pinned,
// the pointer is only valid for the lifetime of the parent byteregion.
INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
uint64_t ofs, uint32_t *len) {
uint64_t ofs, size_t *len) {
assert(ofs >= upb_byteregion_discardofs(r));
const char *ret = upb_bytesrc_getptr(r->bytesrc, ofs, len);
*len = UPB_MIN(*len, upb_byteregion_available(r, ofs));
@ -295,7 +317,7 @@ INLINE const char *upb_byteregion_getptr(const upb_byteregion *r,
// The string data in the returned region is guaranteed to be contiguous and
// NULL-terminated.
upb_byteregion *upb_byteregion_new(const void *str);
upb_byteregion *upb_byteregion_newl(const void *str, uint32_t len);
upb_byteregion *upb_byteregion_newl(const void *str, size_t len);
// May *only* be called on a byteregion created with upb_byteregion_new[l]()!
void upb_byteregion_free(upb_byteregion *r);
@ -399,7 +421,7 @@ INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
typedef struct {
uint64_t ofs;
uint32_t len;
size_t len;
uint32_t refcount;
char data[];
} upb_stdio_buf;
@ -414,7 +436,6 @@ typedef struct {
bool should_close;
upb_stdio_buf **bufs;
uint32_t nbuf, szbuf;
upb_byteregion byteregion;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
@ -433,7 +454,7 @@ void upb_stdio_reset(upb_stdio *stdio, FILE *file);
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s);
upb_byteregion *upb_stdio_allbytes(upb_stdio *stdio);
upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
@ -444,7 +465,7 @@ upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
typedef struct {
upb_bytesrc bytesrc;
const char *str;
uint32_t len;
size_t len;
upb_byteregion byteregion;
} upb_stringsrc;
@ -454,7 +475,11 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// string data must be valid until the stringsrc is reset again or destroyed.
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, uint32_t len);
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
INLINE upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc;
}
// Returns the top-level upb_byteregion* for this stringsrc. Invalidated when
// the stringsrc is reset.
@ -468,7 +493,7 @@ INLINE upb_byteregion *upb_stringsrc_allbytes(upb_stringsrc *s) {
struct _upb_stringsink {
upb_bytesink bytesink;
char *str;
uint32_t len, size;
size_t len, size;
};
typedef struct _upb_stringsink upb_stringsink;
@ -478,12 +503,12 @@ void upb_stringsink_uninit(upb_stringsink *s);
// Resets the sink's string to "str", which the sink takes ownership of.
// "str" may be NULL, which will make the sink allocate a new string.
void upb_stringsink_reset(upb_stringsink *s, char *str, uint32_t len);
void upb_stringsink_reset(upb_stringsink *s, char *str, size_t len);
// Releases ownership of the returned string (which is "len" bytes long) and
// resets the internal string to be empty again (as if reset were called with
// NULL).
const char *upb_stringsink_release(upb_stringsink *s, uint32_t *len);
const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s);

@ -334,7 +334,7 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
if (upb_byteregion_len(bytes) == 0) {
upb_value_setint32(&f->defaultval, e->defaultval);
} else {
uint32_t len;
size_t len;
// ptr is guaranteed to be NULL-terminated because the byteregion was
// created with upb_byteregion_newl().
const char *ptr = upb_byteregion_getptr(bytes, 0, &len);

@ -13,7 +13,7 @@
static upb_mhandlers *upb_mhandlers_new() {
upb_mhandlers *m = malloc(sizeof(*m));
upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers));
upb_inttable_init(&m->fieldtab, 8, sizeof(upb_itofhandlers_ent));
m->startmsg = NULL;
m->endmsg = NULL;
m->is_group = false;
@ -26,21 +26,21 @@ static upb_mhandlers *upb_mhandlers_new() {
static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
upb_fieldtype_t type,
bool repeated) {
uint32_t tag = n << 3 | upb_types[type].native_wire_type;
upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, tag);
if (f) abort();
upb_fhandlers new_f = {false, type, repeated,
repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0),
upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, n);
// TODO: design/refine the API for changing the set of fields or modifying
// existing handlers.
if (e) return NULL;
upb_fhandlers new_f = {type, repeated, UPB_ATOMIC_INIT(0),
n, -1, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL,
#ifdef UPB_USE_JIT_X64
0, 0, 0,
#endif
NULL};
upb_inttable_insert(&m->fieldtab, tag, &new_f);
f = upb_inttable_lookup(&m->fieldtab, tag);
assert(f);
assert(f->type == type);
return f;
upb_fhandlers *ptr = malloc(sizeof(*ptr));
memcpy(ptr, &new_f, sizeof(upb_fhandlers));
upb_itofhandlers_ent ent = {false, ptr};
upb_inttable_insert(&m->fieldtab, n, &ent);
return ptr;
}
upb_fhandlers *upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n,
@ -57,6 +57,7 @@ upb_fhandlers *upb_mhandlers_newfhandlers_subm(upb_mhandlers *m, uint32_t n,
assert(type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP));
assert(subm);
upb_fhandlers *f = _upb_mhandlers_newfhandlers(m, n, type, repeated);
if (!f) return NULL;
f->submsg = subm;
if (type == UPB_TYPE(GROUP))
_upb_mhandlers_newfhandlers(subm, n, UPB_TYPE_ENDGROUP, false);
@ -82,6 +83,12 @@ void upb_handlers_unref(upb_handlers *h) {
if (upb_atomic_unref(&h->refcount)) {
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *mh = h->msgs[i];
for(upb_inttable_iter j = upb_inttable_begin(&mh->fieldtab);
!upb_inttable_done(j);
j = upb_inttable_next(&mh->fieldtab, j)) {
upb_itofhandlers_ent *e = upb_inttable_iter_value(j);
free(e->f);
}
upb_inttable_free(&mh->fieldtab);
#ifdef UPB_USE_JIT_X64
free(mh->tablearray);
@ -154,41 +161,24 @@ upb_mhandlers *upb_handlers_regmsgdef(upb_handlers *h, const upb_msgdef *m,
/* upb_dispatcher *************************************************************/
static upb_fhandlers toplevel_f = {
false, UPB_TYPE(GROUP), false, false, UPB_ATOMIC_INIT(0), 0,
-1, NULL, NULL, // submsg
#ifdef NDEBUG
{{0}},
#else
{{0}, -1},
#endif
NULL, NULL, NULL, NULL, NULL,
#ifdef UPB_USE_JIT_X64
0, 0, 0,
#endif
NULL};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
upb_skip_handler *skip, upb_exit_handler *exit,
void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
upb_exit_handler UPB_NORETURN *exit,
void *srcclosure) {
d->handlers = h;
upb_handlers_ref(h);
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
upb_inttable_compact(&m->fieldtab);
}
d->stack[0].f = &toplevel_f;
d->stack[0].f = NULL; // Should never be read.
d->limit = &d->stack[UPB_MAX_NESTING];
d->skip = skip;
d->exit = exit;
d->exitjmp = exit;
d->srcclosure = srcclosure;
d->top_is_implicit = false;
upb_status_init(&d->status);
d->msgent = NULL;
d->top = NULL;
d->toplevel_msgent = NULL;
d->status = status;
}
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
d->msgent = d->handlers->msgs[0];
d->dispatch_table = &d->msgent->fieldtab;
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure,
upb_mhandlers *top) {
d->msgent = top;
d->toplevel_msgent = top;
d->top = d->stack;
d->top->closure = closure;
d->top->is_sequence = false;
@ -197,46 +187,32 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
upb_handlers_unref(d->handlers);
upb_status_uninit(&d->status);
}
void upb_dispatch_startmsg(upb_dispatcher *d) {
upb_flow_t flow = UPB_CONTINUE;
if (d->msgent->startmsg) d->msgent->startmsg(d->top->closure);
if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
assert(d->top == d->stack);
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
// TODO: should we avoid this copy by passing client's status obj to cbs?
upb_status_copy(status, &d->status);
}
void indent(upb_dispatcher *d) {
for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
}
void indentm1(upb_dispatcher *d) {
for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
upb_status_copy(status, d->status);
}
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_fhandlers *f) {
//indent(d);
//fprintf(stderr, "START SEQ: %d\n", f->number);
if((d->top+1) >= d->limit) {
upb_status_seterrliteral(&d->status, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy.
if (d->top + 1 >= d->limit) {
upb_status_seterrliteral(d->status, "Nesting too deep.");
_upb_dispatcher_abortjmp(d);
}
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startseq) sflow = f->startseq(d->top->closure, f->fval);
if (sflow.flow != UPB_CONTINUE) {
_upb_dispatcher_unwind(d, sflow.flow);
return d->top; // Dummy.
_upb_dispatcher_abortjmp(d);
}
++d->top;
@ -248,8 +224,6 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
}
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
//indentm1(d);
//fprintf(stderr, "END SEQ\n");
assert(d->top > d->stack);
assert(d->top->is_sequence);
upb_fhandlers *f = d->top->f;
@ -257,30 +231,23 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
upb_flow_t flow = UPB_CONTINUE;
if (f->endseq) flow = f->endseq(d->top->closure, f->fval);
if (flow != UPB_CONTINUE) {
printf("YO, UNWINDING!\n");
_upb_dispatcher_unwind(d, flow);
return d->top; // Dummy.
_upb_dispatcher_abortjmp(d);
}
d->msgent = d->top->f->submsg ? d->top->f->submsg : d->handlers->msgs[0];
d->dispatch_table = &d->msgent->fieldtab;
d->msgent = d->top->f ? d->top->f->submsg : d->toplevel_msgent;
return d->top;
}
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f) {
//indent(d);
//fprintf(stderr, "START SUBMSG: %d\n", f->number);
if((d->top+1) >= d->limit) {
upb_status_seterrliteral(&d->status, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy.
if (d->top + 1 >= d->limit) {
upb_status_seterrliteral(d->status, "Nesting too deep.");
_upb_dispatcher_abortjmp(d);
}
upb_sflow_t sflow = UPB_CONTINUE_WITH(d->top->closure);
if (f->startsubmsg) sflow = f->startsubmsg(d->top->closure, f->fval);
if (sflow.flow != UPB_CONTINUE) {
_upb_dispatcher_unwind(d, sflow.flow);
return d->top; // Dummy.
_upb_dispatcher_abortjmp(d);
}
++d->top;
@ -289,24 +256,20 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
d->top->is_packed = false;
d->top->closure = sflow.closure;
d->msgent = f->submsg;
d->dispatch_table = &d->msgent->fieldtab;
upb_dispatch_startmsg(d);
return d->top;
}
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
//indentm1(d);
//fprintf(stderr, "END SUBMSG\n");
assert(d->top > d->stack);
assert(!d->top->is_sequence);
upb_fhandlers *f = d->top->f;
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, d->status);
d->msgent = d->top->f->msg;
d->dispatch_table = &d->msgent->fieldtab;
--d->top;
upb_flow_t flow = UPB_CONTINUE;
if (f->endsubmsg) f->endsubmsg(d->top->closure, f->fval);
if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
return d->top;
}
@ -320,14 +283,7 @@ bool upb_dispatcher_islegalend(upb_dispatcher *d) {
return false;
}
void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
upb_dispatcher_frame *frame = d->top;
while (1) {
frame->f->submsg->endmsg(frame->closure, &d->status);
frame->f->endsubmsg(frame->closure, frame->f->fval);
--frame;
if (frame < d->stack) { d->exit(d->srcclosure); return; }
d->top = frame;
if (flow == UPB_SKIPSUBMSG) return;
}
void _upb_dispatcher_abortjmp(upb_dispatcher *d) {
d->exitjmp(d->srcclosure);
assert(false); // Never returns.
}

@ -132,13 +132,15 @@ typedef upb_flow_t (upb_endfield_handler)(void *closure, upb_value fval);
// A upb_fhandlers object represents the set of handlers associated with one
// specific message field.
//
// TODO: remove upb_decoder-specific fields from this, and instead have
// upb_decoderplan make a deep copy of the whole graph with its own fields
// added.
struct _upb_decoder;
struct _upb_mhandlers;
typedef struct _upb_fieldent {
bool junk;
upb_fieldtype_t type;
bool repeated;
bool is_repeated_primitive;
upb_atomic_t refcount;
uint32_t number;
int32_t valuehasbit;
@ -158,6 +160,11 @@ typedef struct _upb_fieldent {
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fhandlers;
typedef struct {
bool junk; // Stolen by table impl; see table.h for details.
upb_fhandlers *f;
} upb_itofhandlers_ent;
// fhandlers are created as part of a upb_handlers instance, but can be ref'd
// and unref'd to prolong the life of the handlers.
void upb_fhandlers_ref(upb_fhandlers *m);
@ -194,16 +201,18 @@ typedef struct _upb_mhandlers {
upb_inttable fieldtab; // Maps field number -> upb_fhandlers.
bool is_group;
#ifdef UPB_USE_JIT_X64
uint32_t jit_startmsg_pclabel;
uint32_t jit_endofbuf_pclabel;
uint32_t jit_endofmsg_pclabel;
uint32_t jit_dyndispatch_pclabel;
uint32_t jit_unknownfield_pclabel;
int32_t jit_parent_field_done_pclabel;
// Used inside the JIT to track labels (jmp targets) in the generated code.
uint32_t jit_startmsg_pclabel; // Starting a parse of this (sub-)message.
uint32_t jit_endofbuf_pclabel; // ptr hitend, but delim_end or jit_end?
uint32_t jit_endofmsg_pclabel; // Done parsing this (sub-)message.
uint32_t jit_dyndispatch_pclabel; // Dispatch by table lookup.
uint32_t jit_unknownfield_pclabel; // Parsed an unknown field.
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
// Pointer to the JIT code for parsing this message.
void *jit_func;
#endif
} upb_mhandlers;
@ -316,62 +325,47 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, const upb_msgd
typedef struct {
upb_fhandlers *f;
void *closure;
// Members to use as the data source requires.
void *srcclosure;
uint64_t end_ofs;
uint16_t msgindex;
uint16_t fieldindex;
bool is_sequence; // frame represents seq or submsg? (f might be both).
bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX
// (strings aren't pushed).
} upb_dispatcher_frame;
// Called when some of the input needs to be skipped. All frames from the
// current top to "bottom", inclusive, should be skipped.
typedef void upb_skip_handler(void *, upb_dispatcher_frame *bottom);
typedef void upb_exit_handler(void *);
typedef struct {
upb_dispatcher_frame *top, *limit;
upb_handlers *handlers;
// Msg and dispatch table for the current level.
upb_mhandlers *msgent;
upb_inttable *dispatch_table;
upb_skip_handler *skip;
upb_exit_handler *exit;
upb_mhandlers *toplevel_msgent;
upb_exit_handler UPB_NORETURN *exitjmp;
void *srcclosure;
bool top_is_implicit;
// Stack.
upb_status status;
upb_status *status;
upb_dispatcher_frame stack[UPB_MAX_NESTING];
} upb_dispatcher;
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
upb_skip_handler *skip, upb_exit_handler *exit,
void *closure);
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
// Caller retains ownership of the status object.
void upb_dispatcher_init(upb_dispatcher *d, upb_status *status,
upb_exit_handler UPB_NORETURN *exit, void *closure);
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure,
upb_mhandlers *top_msg);
void upb_dispatcher_uninit(upb_dispatcher *d);
// Tests whether the message could legally end here (either the stack is empty
// or the only open stack frame is implicit).
bool upb_dispatcher_islegalend(upb_dispatcher *d);
// Looks up a field by number for the current message.
INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {
return (upb_fhandlers*)upb_inttable_fastlookup(
d->dispatch_table, n, sizeof(upb_fhandlers));
}
void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow);
// Unwinds one or more stack frames based on the given flow constant that was
// just returned from a handler. Calls end handlers as appropriate.
void _upb_dispatcher_abortjmp(upb_dispatcher *d) UPB_NORETURN;
INLINE void _upb_dispatcher_sethas(void *_p, int32_t hasbit) {
char *p = (char*)_p;
if (hasbit >= 0) p[hasbit / 8] |= (1 << (hasbit % 8));
if (hasbit >= 0) p[(uint32_t)hasbit / 8] |= (1 << ((uint32_t)hasbit % 8));
}
// Dispatch functions -- call the user handler and handle errors.
@ -380,11 +374,12 @@ INLINE void upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f,
upb_flow_t flow = UPB_CONTINUE;
if (f->value) flow = f->value(d->top->closure, f->fval, val);
_upb_dispatcher_sethas(d->top->closure, f->valuehasbit);
if (flow != UPB_CONTINUE) _upb_dispatcher_unwind(d, flow);
if (flow != UPB_CONTINUE) _upb_dispatcher_abortjmp(d);
}
void upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d);
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_fhandlers *f);
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d);

@ -86,14 +86,16 @@ void upb_stdmsg_sethas(void *_m, upb_value fval) {
assert(_m != NULL);
char *m = _m;
const upb_fielddef *f = upb_value_getfielddef(fval);
if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
if (f->hasbit >= 0)
m[(uint32_t)f->hasbit / 8] |= (1 << ((uint32_t)f->hasbit % 8));
}
bool upb_stdmsg_has(const void *_m, upb_value fval) {
assert(_m != NULL);
const char *m = _m;
const upb_fielddef *f = upb_value_getfielddef(fval);
return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
return f->hasbit < 0 ||
(m[(uint32_t)f->hasbit / 8] & (1 << ((uint32_t)f->hasbit % 8)));
}
#define UPB_ACCESSORS(type, ctype) \

@ -13,14 +13,95 @@
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
/* upb_decoderplan ************************************************************/
#ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d
#define Dst_REF (d->dynasm)
#define Dst (d)
// These defines are necessary for DynASM codegen.
// See dynasm/dasm_proto.h for more info.
#define Dst_DECL upb_decoderplan *plan
#define Dst_REF (plan->dynasm)
#define Dst (plan)
// In debug mode, make DynASM do internal checks (must be defined before any
// dasm header is included.
#ifndef NDEBUG
#define DASM_CHECKS
#endif
#include "dynasm/dasm_proto.h"
#include "upb/pb/decoder_x64.h"
#endif
typedef struct {
upb_fhandlers base;
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
#ifdef UPB_USE_JIT_X64
uint32_t jit_pclabel;
uint32_t jit_pclabel_notypecheck;
#endif
} upb_dplanfield;
typedef struct {
upb_mhandlers base;
#ifdef UPB_USE_JIT_X64
uint32_t jit_startmsg_pclabel;
uint32_t jit_endofbuf_pclabel;
uint32_t jit_endofmsg_pclabel;
uint32_t jit_dyndispatch_pclabel;
uint32_t jit_unknownfield_pclabel;
int32_t jit_parent_field_done_pclabel;
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
#endif
} upb_dplanmsg;
static void *upb_decoderplan_fptrs[];
void upb_decoderplan_initfhandlers(upb_fhandlers *f) {
f->decode = upb_decoderplan_fptrs[f->type];
}
upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit) {
upb_decoderplan *p = malloc(sizeof(*p));
p->handlers = h;
upb_handlers_ref(h);
h->should_jit = allowjit;
#ifdef UPB_USE_JIT_X64
p->jit_code = NULL;
if (allowjit) upb_decoderplan_makejit(p);
#endif
// Set function pointers for each field's decode function.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
!upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
upb_fhandlers *f = e->f;
upb_decoderplan_initfhandlers(f);
}
}
return p;
}
void upb_decoderplan_unref(upb_decoderplan *p) {
// TODO: make truly refcounted.
upb_handlers_unref(p->handlers);
#ifdef UPB_USE_JIT_X64
if (p->jit_code) upb_decoderplan_freejit(p);
#endif
free(p);
}
bool upb_decoderplan_hasjitcode(upb_decoderplan *p) {
return p->jit_code != NULL;
}
/* upb_decoder ****************************************************************/
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
@ -29,18 +110,17 @@
#define FORCEINLINE static __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
static void upb_decoder_exit(upb_decoder *d) {
UPB_NORETURN static void upb_decoder_exitjmp(upb_decoder *d) {
// Resumable decoder would back out to completed_ptr (and possibly get a
// previous buffer).
siglongjmp(d->exitjmp, 1);
}
static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d;
upb_decoder_exit(d);
UPB_NORETURN static void upb_decoder_exitjmp2(void *d) {
upb_decoder_exitjmp(d);
}
static void upb_decoder_abort(upb_decoder *d, const char *msg) {
upb_status_seterrliteral(d->status, msg);
upb_decoder_exit(d);
UPB_NORETURN static void upb_decoder_abortjmp(upb_decoder *d, const char *msg) {
upb_status_seterrliteral(&d->status, msg);
upb_decoder_exitjmp(d);
}
/* Buffering ******************************************************************/
@ -50,8 +130,12 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) {
// the next one. When we've committed our progress we discard any previous
// buffers' regions.
static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
static void upb_decoder_advance(upb_decoder *d, uint32_t len) {
static size_t upb_decoder_bufleft(upb_decoder *d) {
assert(d->end >= d->ptr);
return d->end - d->ptr;
}
static void upb_decoder_advance(upb_decoder *d, size_t len) {
assert(upb_decoder_bufleft(d) >= len);
d->ptr += len;
}
@ -66,29 +150,49 @@ uint64_t upb_decoder_bufendofs(upb_decoder *d) {
static void upb_decoder_setmsgend(upb_decoder *d) {
upb_dispatcher_frame *f = d->dispatcher.top;
uint32_t delimlen = f->end_ofs - d->bufstart_ofs;
uint32_t buflen = d->end - d->buf;
size_t delimlen = f->end_ofs - d->bufstart_ofs;
size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
d->dispatch_table = &d->dispatcher.msgent->fieldtab;
}
static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
d->bufstart_ofs = upb_decoder_offset(d);
static void upb_decoder_skiptonewbuf(upb_decoder *d, uint64_t ofs) {
assert(ofs >= upb_decoder_offset(d));
if (ofs > upb_byteregion_endofs(d->input))
upb_decoder_abortjmp(d, "Unexpected EOF");
d->buf = NULL;
d->ptr = NULL;
d->end = NULL;
if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 &&
!upb_byteregion_fetch(d->input, d->status)) {
if (upb_eof(d->status)) return false;
upb_decoder_exit(d); // Non-EOF error.
d->delim_end = NULL;
#ifdef UPB_USE_JIT_X64
d->jit_end = NULL;
#endif
d->bufstart_ofs = ofs;
}
static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
upb_decoder_skiptonewbuf(d, upb_decoder_offset(d));
if (upb_byteregion_available(d->input, d->bufstart_ofs) == 0) {
switch (upb_byteregion_fetch(d->input)) {
case UPB_BYTE_OK:
assert(upb_byteregion_available(d->input, d->bufstart_ofs) > 0);
break;
case UPB_BYTE_EOF: return false;
case UPB_BYTE_ERROR: upb_decoder_abortjmp(d, "I/O error in input");
// Decoder resuming is not yet supported.
case UPB_BYTE_WOULDBLOCK:
upb_decoder_abortjmp(d, "Input returned WOULDBLOCK");
}
}
uint32_t len;
size_t len;
d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
assert(len > 0);
d->ptr = d->buf;
d->end = d->buf + len;
upb_decoder_setmsgend(d);
#ifdef UPB_USE_JIT_X64
// If we start parsing a value, we can parse up to 20 bytes without
// having to bounds-check anything (2 10-byte varints). Since the
@ -96,27 +200,29 @@ static bool upb_trypullbuf(upb_decoder *d) {
// JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
upb_decoder_setmsgend(d);
assert(upb_decoder_bufleft(d) > 0);
return true;
}
static void upb_pullbuf(upb_decoder *d) {
if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
if (!upb_trypullbuf(d)) upb_decoder_abortjmp(d, "Unexpected EOF");
}
void upb_decoder_checkpoint(upb_decoder *d) {
upb_byteregion_discard(d->input, upb_decoder_offset(d));
}
void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) {
if (ofs < upb_decoder_bufendofs(d)) {
void upb_decoder_discardto(upb_decoder *d, uint64_t ofs) {
if (ofs <= upb_decoder_bufendofs(d)) {
upb_decoder_advance(d, ofs - upb_decoder_offset(d));
} else {
d->buf = NULL;
d->ptr = NULL;
d->end = NULL;
d->bufstart_ofs = ofs;
upb_decoder_skiptonewbuf(d, ofs);
}
upb_decoder_checkpoint(d);
}
void upb_decoder_checkpoint(upb_decoder *d) {
upb_byteregion_discard(d->input, upb_decoder_offset(d));
void upb_decoder_discard(upb_decoder *d, size_t bytes) {
upb_decoder_discardto(d, upb_decoder_offset(d) + bytes);
}
@ -126,15 +232,13 @@ NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
if (upb_decoder_bufleft(d) == 0) {
upb_pullbuf(d);
ptr = d->ptr;
}
u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
u64 |= ((uint64_t)(byte = *d->ptr) & 0x7F) << bitpos;
upb_decoder_advance(d, 1);
}
if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
if(bitpos == 70 && (byte & 0x80))
upb_decoder_abortjmp(d, "Unterminated varint");
return u64;
}
@ -151,7 +255,7 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
u64 = upb_decode_varint_slow(d);
if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
if (u64 > UINT32_MAX) upb_decoder_abortjmp(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
@ -174,7 +278,7 @@ FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
if (r.p == NULL) upb_decoder_abortjmp(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
} else if (upb_decoder_bufleft(d) > 0) {
@ -200,11 +304,12 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
} else {
// Slow case.
size_t read = 0;
while (read < bytes) {
size_t avail = upb_decoder_bufleft(d);
while (1) {
size_t avail = UPB_MIN(upb_decoder_bufleft(d), bytes - read);
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
if (read == bytes) break;
upb_pullbuf(d);
}
}
@ -213,26 +318,28 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
return u32; // TODO: proper byte swapping
return u32; // TODO: proper byte swapping for big-endian machines.
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
return u64; // TODO: proper byte swapping
return u64; // TODO: proper byte swapping for big-endian machines.
}
INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
uint32_t strlen = upb_decode_varint32(d);
uint64_t offset = upb_decoder_offset(d);
if (offset + strlen > upb_byteregion_endofs(d->input))
upb_decoder_abortjmp(d, "Unexpected EOF");
upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
// Could make it an option on the callback whether we fetchall() first or not.
upb_byteregion_fetchall(&d->str_byteregion, d->status);
if (!upb_ok(d->status)) upb_decoder_exit(d);
upb_decoder_skipto(d, offset + strlen);
if (upb_byteregion_fetchall(&d->str_byteregion) != UPB_BYTE_OK)
upb_decoder_abortjmp(d, "Couldn't fetchall() on string.");
upb_decoder_discardto(d, offset + strlen);
return &d->str_byteregion;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
INLINE void upb_push_msg(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@ -253,8 +360,6 @@ INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
T(INT32, varint, int32, int32_t)
T(INT64, varint, int64, int64_t)
@ -271,9 +376,10 @@ T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, byteregion, upb_byteregion*)
#undef T
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED);
upb_push_msg(d, f, UPB_NONDELIMITED);
}
static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
(void)f;
@ -281,15 +387,30 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
uint32_t len = upb_decode_varint32(d);
upb_push_msg(d, f, upb_decoder_offset(d) + len);
}
#define F(type) &upb_decode_ ## type
static void *upb_decoderplan_fptrs[] = {
&upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
F(SFIXED64), F(SINT32), F(SINT64)};
#undef F
/* The main decoding loop *****************************************************/
static void upb_decoder_checkdelim(upb_decoder *d) {
// TODO: This doesn't work for the case that no buffer is currently loaded
// (ie. d->buf == NULL) because delim_end is NULL even if we are at
// end-of-delim. Need to add a test that exercises this by putting a buffer
// seam in the middle of the final delimited value in a proto that we skip
// for some reason (like because it's unknown and we have no unknown field
// handler).
while (d->delim_end != NULL && d->ptr >= d->delim_end) {
if (d->ptr > d->delim_end) upb_decoder_abort(d, "Bad submessage end");
if (d->ptr > d->delim_end) upb_decoder_abortjmp(d, "Bad submessage end");
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
@ -299,33 +420,36 @@ static void upb_decoder_checkdelim(upb_decoder *d) {
}
}
static void upb_decoder_enterjit(upb_decoder *d) {
(void)d;
#ifdef UPB_USE_JIT_X64
if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
upb_jit_decode(d);
}
#endif
}
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
uint32_t fieldnum = tag >> 3;
upb_itofhandlers_ent *e = upb_inttable_fastlookup(
d->dispatch_table, fieldnum, sizeof(upb_itofhandlers_ent));
upb_fhandlers *f = e ? e->f : NULL;
if (f) {
// Wire type check.
if (wire_type == upb_types[f->type].native_wire_type ||
(wire_type == UPB_WIRE_TYPE_DELIMITED &&
upb_types[f->type].is_numeric)) {
// Wire type is ok.
} else {
f = NULL;
}
}
// There are no explicit "startseq" or "endseq" markers in protobuf
// streams, so we have to infer them by noticing when a repeated field
// starts or ends.
if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
upb_dispatcher_frame *fr = d->dispatcher.top;
if (fr->is_sequence && fr->f != f) {
upb_dispatch_endseq(&d->dispatcher);
upb_decoder_setmsgend(d);
}
if (f && f->repeated && d->dispatcher.top->f != f) {
if (f && f->repeated && (!fr->is_sequence || fr->f != f)) {
uint64_t old_end = d->dispatcher.top->end_ofs;
upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
@ -334,7 +458,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
fr->end_ofs = old_end;
} else {
// Packed primitive field.
fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
uint32_t len = upb_decode_varint32(d);
fr->end_ofs = upb_decoder_offset(d) + len;
fr->is_packed = true;
}
upb_decoder_setmsgend(d);
@ -343,14 +468,20 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
if (f) return f;
// Unknown field.
if (fieldnum == 0 || fieldnum > UPB_MAX_FIELDNUMBER)
upb_decoder_abortjmp(d, "Invalid field number");
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_discard(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_discard(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
upb_decoder_advance(d, upb_decode_varint32(d)); break;
upb_decoder_discard(d, upb_decode_varint32(d)); break;
case UPB_WIRE_TYPE_START_GROUP:
upb_decoder_abortjmp(d, "Can't handle unknown groups yet");
case UPB_WIRE_TYPE_END_GROUP:
upb_decoder_abortjmp(d, "Unmatched ENDGROUP tag");
default:
upb_decoder_abort(d, "Invalid wire type");
upb_decoder_abortjmp(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
upb_decoder_checkpoint(d);
@ -358,16 +489,22 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
}
}
void upb_decoder_decode(upb_decoder *d, upb_status *status) {
if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
d->status = status;
upb_success_t upb_decoder_decode(upb_decoder *d) {
assert(d->input);
if (sigsetjmp(d->exitjmp, 0)) {
assert(!upb_ok(&d->status));
return UPB_ERROR;
}
upb_dispatch_startmsg(&d->dispatcher);
// Prime the buf so we can hit the JIT immediately.
upb_trypullbuf(d);
upb_fhandlers *f = d->dispatcher.top->f;
while(1) { // Main loop: executed once per tag/field pair.
while(1) {
upb_decoder_checkdelim(d);
#ifdef UPB_USE_JIT_X64
upb_decoder_enterjit(d);
upb_decoder_checkpoint(d);
#endif
if (!d->top_is_packed) f = upb_decode_tag(d);
if (!f) {
// Sucessful EOF. We may need to dispatch a top-level implicit frame.
@ -375,64 +512,46 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
assert(d->dispatcher.top->is_sequence);
upb_dispatch_endseq(&d->dispatcher);
}
return;
return UPB_OK;
}
f->decode(d, f);
upb_decoder_checkpoint(d);
}
}
static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) {
upb_decoder *d = _d;
if (f->end_ofs != UPB_NONDELIMITED) {
upb_decoder_skipto(d, d->dispatcher.top->end_ofs);
} else {
// TODO: how to support skipping groups? Dispatcher could drop callbacks,
// or it could be special-cased inside the decoder.
}
void upb_decoder_init(upb_decoder *d) {
upb_status_init(&d->status);
upb_dispatcher_init(&d->dispatcher, &d->status, &upb_decoder_exitjmp2, d);
d->plan = NULL;
d->input = NULL;
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(
&d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
#ifdef UPB_USE_JIT_X64
d->jit_code = NULL;
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
// Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) {
upb_mhandlers *m = handlers->msgs[i];
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_fhandlers *f = upb_inttable_iter_value(i);
#define F(type) &upb_decode_ ## type
static void *fptrs[] = {&upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
F(SFIXED64), F(SINT32), F(SINT64)};
f->decode = fptrs[f->type];
}
}
void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset) {
assert(msg_offset >= 0);
assert(msg_offset < p->handlers->msgs_len);
d->plan = p;
d->msg_offset = msg_offset;
d->input = NULL;
}
void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input,
void *closure) {
assert(d->plan);
upb_dispatcher_frame *f =
upb_dispatcher_reset(&d->dispatcher, closure, d->plan->handlers->msgs[0]);
upb_status_clear(&d->status);
f->end_ofs = UPB_NONDELIMITED;
d->input = input;
d->bufstart_ofs = upb_byteregion_startofs(input);
d->buf = NULL;
d->ptr = NULL;
d->end = NULL; // Force a buffer pull.
d->delim_end = NULL; // But don't let end-of-message get triggered.
d->str_byteregion.bytesrc = input->bytesrc;
#ifdef UPB_USE_JIT_X64
d->jit_end = NULL;
#endif
// Protect against assert in skiptonewbuf().
d->bufstart_ofs = 0;
d->ptr = NULL;
d->buf = NULL;
upb_decoder_skiptonewbuf(d, upb_byteregion_startofs(input));
}
void upb_decoder_uninit(upb_decoder *d) {
#ifdef UPB_USE_JIT_X64
if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
#endif
upb_dispatcher_uninit(&d->dispatcher);
upb_status_uninit(&d->status);
}

@ -21,56 +21,108 @@
extern "C" {
#endif
/* upb_decoder *****************************************************************/
/* upb_decoderplan ************************************************************/
// A decoderplan contains whatever data structures and generated (JIT-ted) code
// are necessary to decode protobuf data of a specific type to a specific set
// of handlers. By generating the plan ahead of time, we avoid having to
// redo this work every time we decode.
//
// A decoderplan is threadsafe, meaning that it can be used concurrently by
// different upb_decoders in different threads. However, the upb_decoders are
// *not* thread-safe.
struct _upb_decoderplan;
typedef struct _upb_decoderplan upb_decoderplan;
// TODO: add parameter for a list of other decoder plans that we can share
// generated code with.
upb_decoderplan *upb_decoderplan_new(upb_handlers *h, bool allowjit);
void upb_decoderplan_unref(upb_decoderplan *p);
// Returns true if the plan contains JIT-ted code. This may not be the same as
// the "allowjit" parameter to the constructor if support for JIT-ting was not
// compiled in.
bool upb_decoderplan_hasjitcode(upb_decoderplan *p);
/* upb_decoder ****************************************************************/
struct dasm_State;
typedef struct _upb_decoder {
upb_byteregion *input; // Input data (serialized).
upb_decoderplan *plan;
int msg_offset; // Which message from the plan is top-level.
upb_byteregion *input; // Input data (serialized), not owned.
upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
upb_status *status; // Where we will store any errors that occur.
upb_status status; // Where we store errors that occur.
upb_byteregion str_byteregion; // For passing string data to callbacks.
upb_inttable *dispatch_table;
// Current input buffer and its stream offset.
const char *buf, *ptr, *end;
uint64_t bufstart_ofs;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
// True if the top stack frame represents a packed field.
bool top_is_packed;
#ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
// JIT-generated machine code (else NULL).
char *jit_code;
size_t jit_size;
char *debug_info;
struct dasm_State *dynasm;
#endif
// For exiting the decoder on error.
sigjmp_buf exitjmp;
} upb_decoder;
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
// on the handlers.
void upb_decoder_init(upb_decoder *d, upb_handlers *h);
void upb_decoder_init(upb_decoder *d);
void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
// state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Decoders must be reset before they can be
// used. A decoder can be reset multiple times. "input" must live until the
// decoder is reset again (or destroyed).
void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure);
// Resets the plan that the decoder will parse from. "msg_offset" indicates
// which message from the plan will be used as the top-level message.
//
// This will also reset the decoder's input to be uninitialized --
// upb_decoder_resetinput() must be called before parsing can occur. The plan
// must live until the decoder is destroyed or reset to a different plan.
//
// Must be called before upb_decoder_resetinput() or upb_decoder_decode().
void upb_decoder_resetplan(upb_decoder *d, upb_decoderplan *p, int msg_offset);
// Resets the input of an already-allocated decoder. This puts it in a state
// where it has not seen any data, and expects the next data to be from the
// beginning of a new protobuf. Decoders must have their input reset before
// they can be used. A decoder can have its input reset multiple times.
// "input" must live until the decoder is destroyed or has it input reset
// again. "c" is the closure that will be passed to the handlers.
//
// Must be called before upb_decoder_decode().
void upb_decoder_resetinput(upb_decoder *d, upb_byteregion *input, void *c);
// Decodes serialized data (calling handlers as the data is parsed), returning
// the success of the operation (call upb_decoder_status() for details).
upb_success_t upb_decoder_decode(upb_decoder *d);
INLINE const upb_status *upb_decoder_status(upb_decoder *d) {
return &d->status;
}
// Implementation details
struct _upb_decoderplan {
upb_handlers *handlers; // owns reference.
#ifdef UPB_USE_JIT_X64
// JIT-generated machine code (else NULL).
char *jit_code;
size_t jit_size;
char *debug_info;
// Decodes serialized data (calling handlers as the data is parsed) until error
// or EOF (see *status for details).
void upb_decoder_decode(upb_decoder *d, upb_status *status);
// This pointer is allocated by dasm_init() and freed by dasm_free().
struct dasm_State *dynasm;
#endif
};
#ifdef __cplusplus
} /* extern "C" */

@ -4,20 +4,15 @@
|// Copyright (c) 2011 Google Inc. See LICENSE for details.
|// Author: Josh Haberman <jhaberman@gmail.com>
|//
|// JIT compiler for upb_decoder on x86. Given a upb_handlers object,
|// generates code specialized to parsing the specific message and
|// calling specific handlers.
|// JIT compiler for upb_decoder on x86. Given a upb_decoderplan object (which
|// contains an embedded set of upb_handlers), generates code specialized to
|// parsing the specific message and calling specific handlers.
|//
|// Since the JIT can call other functions (the JIT'ted code is not a leaf
|// function) we must respect alignment rules. On OS X, this means aligning
|// the stack to 16 bytes.
#define UPB_NONE -1
#define UPB_MULTIPLE -2
#define UPB_TOPLEVEL_ONE -3
#include <sys/mman.h>
#include "dynasm/dasm_proto.h"
#include "dynasm/dasm_x86.h"
#ifndef MAP_ANONYMOUS
@ -73,15 +68,15 @@ gdb_jit_descriptor __jit_debug_descriptor = {1, GDB_JIT_NOACTION, NULL, NULL};
void __attribute__((noinline)) __jit_debug_register_code() { __asm__ __volatile__(""); }
void upb_reg_jit_gdb(upb_decoder *d) {
void upb_reg_jit_gdb(upb_decoderplan *plan) {
// Create debug info.
size_t elf_len = sizeof(upb_jit_debug_elf_file);
d->debug_info = malloc(elf_len);
memcpy(d->debug_info, upb_jit_debug_elf_file, elf_len);
uint64_t *p = (void*)d->debug_info;
for (; (void*)(p+1) <= (void*)d->debug_info + elf_len; ++p) {
if (*p == 0x12345678) { *p = (uintptr_t)d->jit_code; }
if (*p == 0x321) { *p = d->jit_size; }
plan->debug_info = malloc(elf_len);
memcpy(plan->debug_info, upb_jit_debug_elf_file, elf_len);
uint64_t *p = (void*)plan->debug_info;
for (; (void*)(p+1) <= (void*)plan->debug_info + elf_len; ++p) {
if (*p == 0x12345678) { *p = (uintptr_t)plan->jit_code; }
if (*p == 0x321) { *p = plan->jit_size; }
}
// Register the JIT-ted code with GDB.
@ -89,7 +84,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
e->next_entry = __jit_debug_descriptor.first_entry;
e->prev_entry = NULL;
if (e->next_entry) e->next_entry->prev_entry = e;
e->symfile_addr = d->debug_info;
e->symfile_addr = plan->debug_info;
e->symfile_size = elf_len;
__jit_debug_descriptor.first_entry = e;
__jit_debug_descriptor.relevant_entry = e;
@ -99,12 +94,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
#else
void upb_reg_jit_gdb(upb_decoder *d) {
(void)d;
void upb_reg_jit_gdb(upb_decoderplan *plan) {
(void)plan;
}
#endif
// Has to be a separate function, otherwise GCC will complain about
// expressions like (&foo != NULL) because they will never evaluate
// to false.
static void upb_assert_notnull(void *addr) { assert(addr != NULL); }
|.arch x64
|.actionlist upb_jit_actionlist
|.globals UPB_JIT_GLOBAL_
@ -126,7 +126,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|// ALL of the code in this file uses these register allocations.
|// When we "call" within this file, we do not use regular calling
|// conventions, but of course when calling to user callbacks we must.
|.define PTR, rbx
|.define PTR, rbx // Writing this to DECODER->ptr commits our progress.
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
|.type BYTEREGION,upb_byteregion, r14
@ -134,6 +134,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.type STDARRAY, upb_stdarray
|
|.macro callp, addr
|| upb_assert_notnull(addr);
|| if ((uintptr_t)addr < 0xffffffff) {
| call &addr
|| } else {
@ -191,11 +192,12 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| decode_loaded_varint, 0
| mov ecx, edx
| shr ecx, 3
| and edx, 0x7
| and edx, 0x7 // For the type check that will happen later.
| cmp ecx, m->max_field_number // Bounds-check the field.
| ja ->exit_jit // In the future; could be unknown label
|| if ((uintptr_t)m->tablearray < 0xffffffff) {
| mov rax, qword [rcx*8 + m->tablearray] // TODO: support hybrid array/hash tables.
| // TODO: support hybrid array/hash tables.
| mov rax, qword [rcx*8 + m->tablearray]
|| } else {
| mov64 rax, (uintptr_t)m->tablearray
| mov rax, qword [rax + rcx*8]
@ -217,8 +219,9 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| lea rax, [FRAME + sizeof(upb_dispatcher_frame)] // rax for shorter addressing.
| cmp rax, qword DECODER->dispatcher.limit
| jae ->exit_jit // Frame stack overflow.
| mov qword FRAME:rax->f, f
| mov dword FRAME:rax->end_ofs, end_offset_
| mov64 r8, (uintptr_t)f
| mov qword FRAME:rax->f, r8
| mov qword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
@ -294,7 +297,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|
|.macro sethas, reg, hasbit
|| if (hasbit >= 0) {
| or byte [reg + (hasbit / 8)], (1 << (hasbit % 8))
| or byte [reg + ((uint32_t)hasbit / 8)], (1 << ((uint32_t)hasbit % 8))
|| }
|.endmacro
@ -304,7 +307,8 @@ void upb_reg_jit_gdb(upb_decoder *d) {
#include "upb/msg.h"
// Decodes the next val into ARG3, advances PTR.
static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
static void upb_decoderplan_jit_decodefield(upb_decoderplan *plan,
upb_mhandlers *m,
uint8_t type, size_t tag_size) {
// Decode the value into arg 3 for the callback.
switch (type) {
@ -365,9 +369,9 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
// robust checks.
| mov ecx, dword [PTR + tag_size]
| decode_loaded_varint tag_size
| mov rdi, DECODER->effective_end
| mov rdi, DECODER->end
| sub rdi, rax
| cmp ARG3_64, rdi // if (len > d->effective_end - str)
| cmp ARG3_64, rdi // if (len > d->end - str)
| ja ->exit_jit // Can't deliver, whole string not in buf.
// Update PTR to point past end of string.
@ -401,7 +405,7 @@ static void upb_decoder_jit_decodefield(upb_decoder *d, upb_mhandlers *m,
#if 0
// These appear not to speed things up, but keeping around for
// further experimentation.
static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
static void upb_decoderplan_jit_doappend(upb_decoderplan *plan, uint8_t size,
upb_fhandlers *f) {
| mov eax, STDARRAY:ARG1_64->len
| cmp eax, STDARRAY:ARG1_64->size
@ -434,18 +438,19 @@ static void upb_decoder_jit_doappend(upb_decoder *d, uint8_t size,
}
#endif
static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
static void upb_decoderplan_jit_callcb(upb_decoderplan *plan,
upb_fhandlers *f) {
// Call callbacks.
if (upb_issubmsgtype(f->type)) {
if (f->type == UPB_TYPE(MESSAGE)) {
| mov rsi, PTR
| sub rsi, DECODER->buf
| add esi, ARG3_32 // = (d->ptr - d->buf) + delim_len
| add rsi, ARG3_64 // = (d->ptr - d->buf) + delim_len
} else {
assert(f->type == UPB_TYPE(GROUP));
| mov esi, UPB_NONDELIMITED
| mov rsi, UPB_NONDELIMITED
}
| pushframe f, esi, false
| pushframe f, rsi, false
// Call startsubmsg handler (if any).
if (f->startsubmsg) {
@ -456,15 +461,11 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| mov CLOSURE, rdx
}
| mov qword FRAME->closure, CLOSURE
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
const upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
| jmp =>sub_m->jit_startmsg_pclabel;
} else {
| call =>sub_m->jit_startmsg_pclabel;
}
|=>f->jit_submsg_done_pclabel:
// Call endsubmsg handler (if any).
if (f->endsubmsg) {
@ -474,6 +475,8 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->endsubmsg
}
| popframe upb_fhandlers_getmsg(f)
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
} else {
| mov ARG1_64, CLOSURE
// Test for callbacks we can specialize.
@ -499,15 +502,15 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
f->value == &upb_stdmsg_setuint64_r ||
f->value == &upb_stdmsg_setptr_r ||
f->value == &upb_stdmsg_setdouble_r) {
upb_decoder_jit_doappend(d, 8, f);
upb_decoderplan_jit_doappend(plan, 8, f);
} else if (f->value == &upb_stdmsg_setint32_r ||
f->value == &upb_stdmsg_setuint32_r ||
f->value == &upb_stdmsg_setfloat_r) {
upb_decoder_jit_doappend(d, 4, f);
upb_decoderplan_jit_doappend(plan, 4, f);
} else if (f->value == &upb_stdmsg_setbool_r) {
upb_decoder_jit_doappend(d, 1, f);
upb_decoderplan_jit_doappend(plan, 1, f);
#endif
} else {
} else if (f->value) {
// Load closure and fval into arg registers.
||#ifndef NDEBUG
||// Since upb_value carries type information in debug mode
@ -519,13 +522,14 @@ static void upb_decoder_jit_callcb(upb_decoder *d, upb_fhandlers *f) {
| callp f->value
}
| sethas CLOSURE, f->valuehasbit
}
// TODO: Handle UPB_SKIPSUBMSG, UPB_BREAK
| mov DECODER->ptr, PTR
}
}
// PTR should point to the beginning of the tag.
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
uint32_t next_tag, upb_mhandlers *m,
static void upb_decoderplan_jit_field(upb_decoderplan *plan, uint64_t tag,
uint64_t next_tag, upb_mhandlers *m,
upb_fhandlers *f, upb_fhandlers *next_f) {
// PC-label for the dispatch table.
// We check the wire type (which must be loaded in edx) because the
@ -535,8 +539,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
| jne ->exit_jit // In the future: could be an unknown field or packed.
|=>f->jit_pclabel_notypecheck:
if (f->repeated) {
| mov esi, FRAME->end_ofs
| pushframe f, esi, true
| mov rsi, FRAME->end_ofs
| pushframe f, rsi, true
if (f->startseq) {
| mov ARG1_64, CLOSURE
| loadfval f
@ -555,8 +559,8 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag,
return;
}
upb_decoder_jit_decodefield(d, m, f->type, tag_size);
upb_decoder_jit_callcb(d, f);
upb_decoderplan_jit_decodefield(plan, m, f->type, tag_size);
upb_decoderplan_jit_callcb(plan, f);
// Epilogue: load next tag, check for repeated field.
| check_eob m
@ -586,13 +590,11 @@ static int upb_compare_uint32(const void *a, const void *b) {
return *(uint32_t*)a - *(uint32_t*)b;
}
static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
static void upb_decoderplan_jit_msg(upb_decoderplan *plan, upb_mhandlers *m) {
|=>m->jit_startmsg_pclabel:
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
// There was a call to get here, so we need to align the stack.
| sub rsp, 8
}
// Call startmsg handler (if any):
if (m->startmsg) {
// upb_flow_t startmsg(void *closure);
@ -615,23 +617,30 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
int num_keys = upb_inttable_count(&m->fieldtab);
uint32_t *keys = malloc(num_keys * sizeof(*keys));
int idx = 0;
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab);
!upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
keys[idx++] = upb_inttable_iter_key(i);
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
upb_fhandlers *last_f = NULL;
uint32_t last_tag = 0;
uint64_t last_encoded_tag = 0;
for(int i = 0; i < num_keys; i++) {
uint32_t key = keys[i];
upb_fhandlers *f = upb_inttable_lookup(&m->fieldtab, key);
uint32_t tag = upb_vencode32(key);
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
last_tag = tag;
uint32_t fieldnum = keys[i];
upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, fieldnum);
upb_fhandlers *f = e->f;
assert(f->number == fieldnum);
uint32_t tag = (f->number << 3) | upb_types[f->type].native_wire_type;
uint64_t encoded_tag = upb_vencode32(tag);
// No tag should be greater than 5 bytes.
assert(encoded_tag <= 0xffffffffff);
if (last_f) upb_decoderplan_jit_field(
plan, last_encoded_tag, encoded_tag, m, last_f, f);
last_encoded_tag = encoded_tag;
last_f = f;
}
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
upb_decoderplan_jit_field(plan, last_encoded_tag, 0, m, last_f, NULL);
free(keys);
@ -655,22 +664,29 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
| callp m->endmsg
}
if (m->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
if (m->is_group) {
// Advance past the "end group" tag.
// TODO: Handle UPB_BREAK
| mov DECODER->ptr, PTR
}
// Counter previous alignment.
| add rsp, 8
| ret
} else if (m->jit_parent_field_done_pclabel == UPB_TOPLEVEL_ONE) {
| jmp ->exit_jit
} else {
| jmp =>m->jit_parent_field_done_pclabel
}
}
static const char *dbgfmt =
"JIT encountered unknown field! wt=%d, fn=%d\n";
static void upb_decoder_jit(upb_decoder *d) {
static void upb_decoderplan_jit(upb_decoderplan *plan) {
// The JIT prologue/epilogue trampoline that is generated in this function
// does not depend on the handlers, so it will never vary. Ideally we would
// put it in an object file and just link it into upb so we could have only a
// single copy of it instead of one copy for each decoderplan. But our
// options for doing that are undesirable: GCC inline assembly is
// complicated, not portable to other compilers, and comes with subtle
// caveats about incorrect things what the optimizer might do if you eg.
// execute non-local jumps. Putting this code in a .s file would force us to
// calculate the structure offsets ourself instead of symbolically
// (ie. [r15 + 0xcd] instead of DECODER->ptr). So we tolerate a bit of
// unnecessary duplication/redundancy.
| push rbp
| mov rbp, rsp
| push r15
@ -686,18 +702,14 @@ static void upb_decoder_jit(upb_decoder *d) {
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr
upb_handlers *h = d->dispatcher.handlers;
if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_MULTIPLE) {
| call =>h->msgs[0]->jit_startmsg_pclabel
| jmp ->exit_jit
}
// TODO: push return addresses for re-entry (will be necessary for multiple
// buffer support).
for (int i = 0; i < h->msgs_len; i++) upb_decoder_jit_msg(d, h->msgs[i]);
| call ARG2_64
|->exit_jit:
| mov DECODER->ptr, PTR
// Restore stack pointer to where it was before any "call" instructions
// inside our generated code.
| lea rsp, [rbp - 48]
// Counter previous alignment.
| add rsp, 8
| pop rbx
@ -707,122 +719,128 @@ static void upb_decoder_jit(upb_decoder *d) {
| pop r15
| leave
| ret
|=>0:
| mov rdi, stderr
| mov rsi, dbgfmt
| callp fprintf
| callp abort
upb_handlers *h = plan->handlers;
for (int i = 0; i < h->msgs_len; i++)
upb_decoderplan_jit_msg(plan, h->msgs[i]);
}
void upb_decoder_jit_assignfieldlabs(upb_fhandlers *f,
static void upb_decoderplan_jit_assignfieldlabs(upb_fhandlers *f,
uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
void upb_decoder_jit_assignmsglabs(upb_mhandlers *m, uint32_t *pclabel_count) {
static void upb_decoderplan_jit_assignmsglabs(upb_mhandlers *m,
uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
m->jit_dyndispatch_pclabel = (*pclabel_count)++;
m->jit_unknownfield_pclabel = (*pclabel_count)++;
m->jit_parent_field_done_pclabel = UPB_NONE;
m->max_field_number = 0;
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
upb_fhandlers *f = upb_inttable_iter_value(i);
upb_decoder_jit_assignfieldlabs(f, pclabel_count);
upb_itofhandlers_ent *e = upb_inttable_iter_value(i);
upb_decoderplan_jit_assignfieldlabs(e->f, pclabel_count);
}
// XXX: Won't work for large field numbers; will need to use a upb_table.
// TODO: support large field numbers by either using a hash table or
// generating code for a binary search. For now large field numbers
// will just fall back to the table decoder.
m->max_field_number = UPB_MIN(m->max_field_number, 16000);
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
// Second pass: for messages that have only one parent, link them to the field
// from which they are called.
void upb_decoder_jit_assignmsglabs2(upb_mhandlers *m) {
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_fhandlers *f = upb_inttable_iter_value(i);
if (upb_issubmsgtype(f->type)) {
upb_mhandlers *sub_m = upb_fhandlers_getsubmsg(f);
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
} else {
sub_m->jit_parent_field_done_pclabel = UPB_MULTIPLE;
}
}
}
}
void upb_decoder_makejit(upb_decoder *d) {
d->debug_info = NULL;
static void upb_decoderplan_makejit(upb_decoderplan *plan) {
plan->debug_info = NULL;
// Assign pclabels.
uint32_t pclabel_count = 1;
upb_handlers *h = d->dispatcher.handlers;
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs(h->msgs[i], &pclabel_count);
uint32_t pclabel_count = 0;
upb_handlers *h = plan->handlers;
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs2(h->msgs[i]);
if (h->msgs[0]->jit_parent_field_done_pclabel == UPB_NONE) {
h->msgs[0]->jit_parent_field_done_pclabel = UPB_TOPLEVEL_ONE;
}
upb_decoderplan_jit_assignmsglabs(h->msgs[i], &pclabel_count);
void **globals = malloc(UPB_JIT_GLOBAL__MAX * sizeof(*globals));
dasm_init(d, 1);
dasm_setupglobal(d, globals, UPB_JIT_GLOBAL__MAX);
dasm_growpc(d, pclabel_count);
dasm_setup(d, upb_jit_actionlist);
dasm_init(plan, 1);
dasm_setupglobal(plan, globals, UPB_JIT_GLOBAL__MAX);
dasm_growpc(plan, pclabel_count);
dasm_setup(plan, upb_jit_actionlist);
upb_decoder_jit(d);
upb_decoderplan_jit(plan);
dasm_link(d, &d->jit_size);
int dasm_status = dasm_link(plan, &plan->jit_size);
(void)dasm_status;
assert(dasm_status == DASM_S_OK);
d->jit_code = mmap(NULL, d->jit_size, PROT_READ | PROT_WRITE,
plan->jit_code = mmap(NULL, plan->jit_size, PROT_READ | PROT_WRITE,
MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
upb_reg_jit_gdb(d);
upb_reg_jit_gdb(plan);
dasm_encode(d, d->jit_code);
dasm_encode(plan, plan->jit_code);
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_mhandlers *m = h->msgs[i];
m->jit_func =
plan->jit_code + dasm_getpclabel(plan, m->jit_startmsg_pclabel);
for (uint32_t j = 0; j <= m->max_field_number; j++) {
upb_fhandlers *f = NULL;
for (int k = 0; k < 8; k++) {
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
if (f) break;
}
upb_itofhandlers_ent *e = upb_inttable_lookup(&m->fieldtab, j);
upb_fhandlers *f = e ? e->f : NULL;
if (f) {
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, f->jit_pclabel);
m->tablearray[j] =
plan->jit_code + dasm_getpclabel(plan, f->jit_pclabel);
} else {
// Don't handle unknown fields yet.
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
// TODO: extend the JIT to handle unknown fields.
// For the moment we exit the JIT for any unknown field.
m->tablearray[j] = globals[UPB_JIT_GLOBAL_exit_jit];
}
}
}
dasm_free(d);
dasm_free(plan);
free(globals);
mprotect(d->jit_code, d->jit_size, PROT_EXEC | PROT_READ);
mprotect(plan->jit_code, plan->jit_size, PROT_EXEC | PROT_READ);
// View with: objdump -M intel -D -b binary -mi386 -Mx86-64 /tmp/machine-code
// Or: ndisasm -b 64 /tmp/machine-code
FILE *f = fopen("/tmp/machine-code", "wb");
fwrite(d->jit_code, d->jit_size, 1, f);
fwrite(plan->jit_code, plan->jit_size, 1, f);
fclose(f);
}
void upb_decoder_freejit(upb_decoder *d) {
munmap(d->jit_code, d->jit_size);
free(d->debug_info);
static void upb_decoderplan_freejit(upb_decoderplan *plan) {
munmap(plan->jit_code, plan->jit_size);
free(plan->debug_info);
// TODO: unregister
}
static void upb_decoder_enterjit(upb_decoder *d) {
if (d->plan->jit_code &&
d->dispatcher.top == d->dispatcher.stack &&
d->ptr && d->ptr < d->jit_end) {
#ifndef NDEBUG
register uint64_t rbx asm ("rbx") = 11;
register uint64_t r12 asm ("r12") = 12;
register uint64_t r13 asm ("r13") = 13;
register uint64_t r14 asm ("r14") = 14;
register uint64_t r15 asm ("r15") = 15;
#endif
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
void (*upb_jit_decode)(upb_decoder *d, void*) = (void*)d->plan->jit_code;
upb_jit_decode(d, d->plan->handlers->msgs[d->msg_offset]->jit_func);
assert(d->ptr <= d->end);
// Test that callee-save registers were properly restored.
assert(rbx == 11);
assert(r12 == 12);
assert(r13 == 13);
assert(r14 == 14);
assert(r15 == 15);
}
}

@ -12,8 +12,8 @@
#include "upb/pb/glue.h"
#include "upb/pb/textprinter.h"
void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
upb_status *status) {
bool upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
bool allow_jit, upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
@ -21,13 +21,21 @@ void upb_strtomsg(const char *str, size_t len, void *msg, const upb_msgdef *md,
upb_decoder d;
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, md);
upb_decoder_init(&d, h);
upb_decoderplan *p = upb_decoderplan_new(h, allow_jit);
upb_decoder_init(&d);
upb_handlers_unref(h);
upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), msg);
upb_decoder_decode(&d, status);
upb_decoder_resetplan(&d, p, 0);
upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), msg);
upb_success_t ret = upb_decoder_decode(&d);
// stringsrc and the handlers registered by upb_accessors_reghandlers()
// should not suspend.
assert((ret == UPB_OK) == upb_ok(upb_decoder_status(&d)));
if (status) upb_status_copy(status, upb_decoder_status(&d));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
upb_decoderplan_unref(p);
return ret == UPB_OK;
}
void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
@ -35,7 +43,7 @@ void *upb_filetonewmsg(const char *fname, const upb_msgdef *md, upb_status *s) {
size_t len;
char *data = upb_readfile(fname, &len);
if (!data) goto err;
upb_strtomsg(data, len, msg, md, s);
upb_strtomsg(data, len, msg, md, false, s);
if (!upb_ok(s)) goto err;
return msg;
@ -69,7 +77,6 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
}
#endif
// TODO: read->load.
upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_status *status) {
upb_stringsrc strsrc;
@ -79,17 +86,21 @@ upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
upb_handlers *h = upb_handlers_new();
upb_descreader_reghandlers(h);
upb_decoderplan *p = upb_decoderplan_new(h, false);
upb_decoder d;
upb_decoder_init(&d, h);
upb_decoder_init(&d);
upb_handlers_unref(h);
upb_descreader r;
upb_descreader_init(&r);
upb_decoder_reset(&d, upb_stringsrc_allbytes(&strsrc), &r);
upb_decoder_resetplan(&d, p, 0);
upb_decoder_resetinput(&d, upb_stringsrc_allbytes(&strsrc), &r);
upb_decoder_decode(&d, status);
upb_success_t ret = upb_decoder_decode(&d);
if (status) upb_status_copy(status, upb_decoder_status(&d));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
if (!upb_ok(status)) {
upb_decoderplan_unref(p);
if (ret != UPB_OK) {
upb_descreader_uninit(&r);
return NULL;
}

@ -36,8 +36,8 @@ extern "C" {
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
void upb_strtomsg(const char *str, size_t len, void *msg,
const upb_msgdef *md, upb_status *s);
bool upb_strtomsg(const char *str, size_t len, void *msg,
const upb_msgdef *md, bool allow_jit, upb_status *s);
// Parses the given file into a new message of the given type. Caller owns
// the returned message (or NULL if an error occurred).

@ -19,6 +19,18 @@
extern "C" {
#endif
// The maximum number of bytes that it takes to encode a 64-bit varint.
// Note that with a better encoding this could be 9 (TODO: write up a
// wiki document about this).
#define UPB_PB_VARINT_MAX_LEN 10
/* Zig-zag encoding/decoding **************************************************/
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
/* Decoding *******************************************************************/
// All decoding functions return this struct by value.
@ -56,7 +68,7 @@ done:
INLINE upb_decoderet upb_vdecode_branch64(const char *p) {
uint64_t val;
uint64_t b;
upb_decoderet r = {(void*)0, 0};
upb_decoderet r = {NULL, 0};
b = *(p++); val = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(p++); val |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(p++); val |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
@ -124,17 +136,33 @@ INLINE int upb_value_size(uint64_t val) {
return val == 0 ? 1 : high_bit / 8 + 1;
}
// Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
// bytes long), returning how many bytes were used.
//
// TODO: benchmark and optimize if necessary.
INLINE size_t upb_vencode64(uint64_t val, char *buf) {
if (val == 0) { buf[0] = 0; return 1; }
size_t i = 0;
while (val) {
uint8_t byte = val & 0x7f;
val >>= 7;
if (val) byte |= 0x80;
buf[i++] = byte;
}
return i;
}
// Encodes a 32-bit varint, *not* sign-extended.
INLINE uint64_t upb_vencode32(uint32_t val) {
char buf[UPB_PB_VARINT_MAX_LEN];
size_t bytes = upb_vencode64(val, buf);
uint64_t ret = 0;
for (int bitpos = 0; val; bitpos+=8, val >>=7) {
if (bitpos > 0) ret |= (1 << (bitpos-1));
ret |= (val & 0x7f) << bitpos;
}
assert(bytes <= 5);
memcpy(&ret, buf, bytes);
assert(ret <= 0xffffffffff);
return ret;
}
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -127,6 +127,8 @@ INLINE bool _upb_inttable_isarrkey(const upb_inttable *t, uint32_t k) {
// We have the caller specify the entry_size because fixing this as a literal
// (instead of reading table->entry_size) gives the compiler more ability to
// optimize.
//
// Note: All returned pointers are invalidated by inserts!
INLINE void *_upb_inttable_fastlookup(const upb_inttable *t, uint32_t key,
size_t entry_size, size_t value_size) {
upb_inttable_value *arrval =
@ -203,8 +205,11 @@ typedef struct {
} upb_inttable_iter;
upb_inttable_iter upb_inttable_begin(const upb_inttable *t);
upb_inttable_iter upb_inttable_next(const upb_inttable *t, upb_inttable_iter iter);
INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
upb_inttable_iter upb_inttable_next(const upb_inttable *t,
upb_inttable_iter iter);
INLINE bool upb_inttable_done(upb_inttable_iter iter) {
return iter.value == NULL;
}
INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
return iter.key;
}

@ -15,29 +15,32 @@
#include "upb/bytestream.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(wire_type, ctype, inmemory_type) \
{alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
#define TYPE_INFO(wire_type, ctype, inmemory_type, is_numeric) \
{alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), \
#ctype, is_numeric},
const upb_type_info upb_types[] = {
TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64) // UINT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // INT32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64) // FIXED64
TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32) // FIXED32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL) // BOOL
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // STRING
TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE) // GROUP
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE) // MESSAGE
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // BYTES
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32) // UINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32) // ENUM
TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32) // SFIXED32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64) // SFIXED64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // SINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // SINT64
// END_GROUP is not real, but used to signify the pseudo-field that
// ends a group from within the group.
TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE, false) // ENDGROUP
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE, true) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT, true) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // INT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64, true) // UINT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // INT32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64, true) // FIXED64
TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32, true) // FIXED32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL, true) // BOOL
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // STRING
TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE, false) // GROUP
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE, false) // MESSAGE
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING, false) // BYTES
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32, true) // UINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32, true) // ENUM
TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32, true) // SFIXED32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64, true) // SFIXED64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32, true) // SINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64, true) // SINT64
};
#ifdef NDEBUG
@ -66,13 +69,13 @@ void upb_status_seterrf(upb_status *s, const char *msg, ...) {
}
void upb_status_seterrliteral(upb_status *status, const char *msg) {
status->code = UPB_ERROR;
status->error = true;
status->str = msg;
status->space = NULL;
}
void upb_status_copy(upb_status *to, const upb_status *from) {
to->status = from->status;
to->error = from->error;
to->eof = from->eof;
to->code = from->code;
to->space = from->space;
@ -92,15 +95,20 @@ const char *upb_status_getstr(const upb_status *_status) {
// Function is logically const but can modify internal state to materialize
// the string.
upb_status *status = (upb_status*)_status;
if (status->str == NULL && status->space && status->space->code_to_string) {
if (status->str == NULL && status->space) {
if (status->space->code_to_string) {
status->space->code_to_string(status->code, status->buf, status->bufsize);
status->str = status->buf;
} else {
upb_status_seterrf(status, "No message, error space=%s, code=%d\n",
status->space->name, status->code);
}
}
return status->str;
}
void upb_status_clear(upb_status *status) {
status->status = UPB_OK;
status->error = false;
status->eof = false;
status->code = 0;
status->space = NULL;
@ -114,19 +122,38 @@ void upb_status_setcode(upb_status *status, upb_errorspace *space, int code) {
}
void upb_status_fromerrno(upb_status *status) {
if (errno == 0) {
status->status = UPB_OK;
} else if (errno == EAGAIN || errno == EWOULDBLOCK) {
status->status = UPB_WOULDBLOCK;
} else {
status->status = UPB_ERROR;
}
if (errno != 0 && !upb_errno_is_wouldblock()) {
status->error = true;
upb_status_setcode(status, &upb_posix_errorspace, errno);
}
}
bool upb_errno_is_wouldblock() {
return
#ifdef EAGAIN
errno == EAGAIN ||
#endif
#ifdef EWOULDBLOCK
errno == EWOULDBLOCK ||
#endif
false;
}
bool upb_posix_codetostr(int code, char *buf, size_t len) {
if (strerror_r(code, buf, len) == -1) {
if (errno == EINVAL) {
return snprintf(buf, len, "Invalid POSIX error number %d\n", code) >= len;
} else if (errno == ERANGE) {
return false;
}
assert(false);
}
return true;
}
upb_errorspace upb_posix_errorspace = {"POSIX", NULL}; // TODO
upb_errorspace upb_posix_errorspace = {"POSIX", &upb_posix_codetostr};
int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
@ -141,7 +168,7 @@ int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
// Need to print again, because some characters were truncated. vsnprintf
// will not write the entire string unless you give it space to store the
// NULL terminator also.
while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
*size = (ofs + true_len + 1);
char *newbuf = realloc(*buf, *size);
if (!newbuf) return -1;
vsnprintf(newbuf + ofs, true_len + 1, fmt, args);

@ -10,10 +10,12 @@
#ifndef UPB_H_
#define UPB_H_
#include <stdbool.h>
#include <stdint.h>
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "descriptor_const.h"
#include "atomic.h"
@ -26,6 +28,12 @@ extern "C" {
#define INLINE static inline
#endif
#ifdef __GNUC__
#define UPB_NORETURN __attribute__((__noreturn__))
#else
#define UPB_NORETURN
#endif
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
@ -115,6 +123,7 @@ typedef struct {
uint8_t native_wire_type;
uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
const char *ctype;
bool is_numeric; // Only numeric types can be packed.
} upb_type_info;
// A static array of info about all of the field types, indexed by type number.
@ -176,6 +185,7 @@ typedef struct {
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
memset(val, 0, sizeof(*val)); \
SET_TYPE(val->type, proto_type); \
val->val.membername = cval; \
} \
@ -206,27 +216,31 @@ extern upb_value UPB_NO_VALUE;
/* upb_status *****************************************************************/
enum {
typedef enum {
UPB_OK, // The operation completed successfully.
UPB_WOULDBLOCK, // Stream is nonblocking and the operation would block.
UPB_SUSPENDED, // The operation was suspended and may be resumed later.
UPB_ERROR, // An error occurred.
};
} upb_success_t;
typedef struct {
const char *name;
// Writes a NULL-terminated string to "buf" containing an error message for
// the given error code, returning false if the message was too large to fit.
bool (*code_to_string)(int code, char *buf, uint32_t len);
bool (*code_to_string)(int code, char *buf, size_t len);
} upb_errorspace;
typedef struct {
char status;
bool error;
bool eof;
int code; // Can be set to a more specific code (defined by error space).
// Specific status code defined by some error space (optional).
int code;
upb_errorspace *space;
// Error message (optional).
const char *str; // NULL when no message is present. NULL-terminated.
char *buf; // Owned by the status.
uint32_t bufsize;
size_t bufsize;
} upb_status;
#define UPB_STATUS_INIT {UPB_OK, false, 0, NULL, NULL, NULL, 0}
@ -234,7 +248,7 @@ typedef struct {
void upb_status_init(upb_status *status);
void upb_status_uninit(upb_status *status);
INLINE bool upb_ok(const upb_status *status) { return status->code == UPB_OK; }
INLINE bool upb_ok(const upb_status *status) { return !status->error; }
INLINE bool upb_eof(const upb_status *status) { return status->eof; }
void upb_status_clear(upb_status *status);
@ -248,6 +262,7 @@ void upb_status_copy(upb_status *to, const upb_status *from);
extern upb_errorspace upb_posix_errorspace;
void upb_status_fromerrno(upb_status *status);
bool upb_errno_is_wouldblock();
// Like vasprintf (which allocates a string large enough for the result), but
// uses *buf (which can be NULL) as a starting point and reallocates it only if
@ -255,7 +270,7 @@ void upb_status_fromerrno(upb_status *status);
// of the buffer. Starts writing at the given offset into the string; bytes
// preceding this offset are unaffected. Returns the new length of the string,
// or -1 on memory allocation failure.
int upb_vrprintf(char **buf, uint32_t *size, uint32_t ofs,
int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args);
#ifdef __cplusplus

Loading…
Cancel
Save