Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
467 lines
15 KiB
467 lines
15 KiB
/* |
|
* upb - a minimalist implementation of protocol buffers. |
|
* |
|
* Copyright (c) 2009-2013 Google Inc. See LICENSE for details. |
|
* Author: Josh Haberman <jhaberman@gmail.com> |
|
* |
|
* upb::pb::Decoder implements a high performance, streaming, resumable decoder |
|
* for the binary protobuf format. |
|
*/ |
|
|
|
#ifndef UPB_DECODER_H_ |
|
#define UPB_DECODER_H_ |
|
|
|
#include "upb/table.int.h" |
|
#include "upb/sink.h" |
|
|
|
#ifdef __cplusplus |
|
namespace upb { |
|
namespace pb { |
|
class CodeCache; |
|
class Decoder; |
|
class DecoderMethod; |
|
class DecoderMethodOptions; |
|
} // namespace pb |
|
} // namespace upb |
|
|
|
typedef upb::pb::CodeCache upb_pbcodecache; |
|
typedef upb::pb::Decoder upb_pbdecoder; |
|
typedef upb::pb::DecoderMethod upb_pbdecodermethod; |
|
typedef upb::pb::DecoderMethodOptions upb_pbdecodermethodopts; |
|
#else |
|
struct upb_pbdecoder; |
|
struct upb_pbdecodermethod; |
|
struct upb_pbdecodermethodopts; |
|
struct upb_pbcodecache; |
|
|
|
typedef struct upb_pbdecoder upb_pbdecoder; |
|
typedef struct upb_pbdecodermethod upb_pbdecodermethod; |
|
typedef struct upb_pbdecodermethodopts upb_pbdecodermethodopts; |
|
typedef struct upb_pbcodecache upb_pbcodecache; |
|
#endif |
|
|
|
// The maximum that any submessages can be nested. Matches proto2's limit. |
|
// This specifies the size of the decoder's statically-sized array and therefore |
|
// setting it high will cause the upb::pb::Decoder object to be larger. |
|
// |
|
// If necessary we can add a runtime-settable property to Decoder that allow |
|
// this to be larger than the compile-time setting, but this would add |
|
// complexity, particularly since we would have to decide how/if to give users |
|
// the ability to set a custom memory allocation function. |
|
#define UPB_DECODER_MAX_NESTING 64 |
|
|
|
// Internal-only struct used by the decoder. |
|
typedef struct { |
|
#ifdef __cplusplus |
|
private: |
|
#endif |
|
// Space optimization note: we store two pointers here that the JIT |
|
// doesn't need at all; the upb_handlers* inside the sink and |
|
// the dispatch table pointer. We can optimze so that the JIT uses |
|
// smaller stack frames than the interpreter. The only thing we need |
|
// to guarantee is that the fallback routines can find end_ofs. |
|
|
|
#ifdef __cplusplus |
|
char sink[sizeof(upb_sink)]; |
|
#else |
|
upb_sink sink; |
|
#endif |
|
// The absolute stream offset of the end-of-frame delimiter. |
|
// Non-delimited frames (groups and non-packed repeated fields) reuse the |
|
// delimiter of their parent, even though the frame may not end there. |
|
// |
|
// NOTE: the JIT stores a slightly different value here for non-top frames. |
|
// It stores the value relative to the end of the enclosed message. But the |
|
// top frame is still stored the same way, which is important for ensuring |
|
// that calls from the JIT into C work correctly. |
|
uint64_t end_ofs; |
|
const uint32_t *base; |
|
|
|
// 0 indicates a length-delimited field. |
|
// A positive number indicates a known group. |
|
// A negative number indicates an unknown group. |
|
int32_t groupnum; |
|
upb_inttable *dispatch; // Not used by the JIT. |
|
} upb_pbdecoder_frame; |
|
|
|
#ifdef __cplusplus |
|
|
|
// The parameters one uses to construct a DecoderMethod. |
|
// TODO(haberman): move allowjit here? Seems more convenient for users. |
|
class upb::pb::DecoderMethodOptions { |
|
public: |
|
// Parameter represents the destination handlers that this method will push |
|
// to. |
|
explicit DecoderMethodOptions(const Handlers* dest_handlers); |
|
|
|
// Should the decoder push submessages to lazy handlers for fields that have |
|
// them? The caller should set this iff the lazy handlers expect data that is |
|
// in protobuf binary format and the caller wishes to lazy parse it. |
|
void set_lazy(bool lazy); |
|
|
|
private: |
|
#else |
|
struct upb_pbdecodermethodopts { |
|
#endif |
|
const upb_handlers *handlers; |
|
bool lazy; |
|
}; |
|
|
|
#ifdef __cplusplus |
|
|
|
// Represents the code to parse a protobuf according to a destination Handlers. |
|
class upb::pb::DecoderMethod /* : public upb::RefCounted */ { |
|
public: |
|
// From upb::ReferenceCounted. |
|
void Ref(const void* owner) const; |
|
void Unref(const void* owner) const; |
|
void DonateRef(const void* from, const void* to) const; |
|
void CheckRef(const void* owner) const; |
|
|
|
// The destination handlers that are statically bound to this method. |
|
// This method is only capable of outputting to a sink that uses these |
|
// handlers. |
|
const Handlers* dest_handlers() const; |
|
|
|
// The input handlers for this decoder method. |
|
const BytesHandler* input_handler() const; |
|
|
|
// Whether this method is native. |
|
bool is_native() const; |
|
|
|
// Convenience method for generating a DecoderMethod without explicitly |
|
// creating a CodeCache. |
|
static reffed_ptr<const DecoderMethod> New(const DecoderMethodOptions& opts); |
|
|
|
private: |
|
UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod); |
|
#else |
|
struct upb_pbdecodermethod { |
|
#endif |
|
upb_refcounted base; |
|
|
|
// While compiling, the base is relative in "ofs", after compiling it is |
|
// absolute in "ptr". |
|
union { |
|
uint32_t ofs; // PC offset of method. |
|
void *ptr; // Pointer to bytecode or machine code for this method. |
|
} code_base; |
|
|
|
// The decoder method group to which this method belongs. We own a ref. |
|
// Owning a ref on the entire group is more coarse-grained than is strictly |
|
// necessary; all we truly require is that methods we directly reference |
|
// outlive us, while the group could contain many other messages we don't |
|
// require. But the group represents the messages that were |
|
// allocated+compiled together, so it makes the most sense to free them |
|
// together also. |
|
const upb_refcounted *group; |
|
|
|
// Whether this method is native code or bytecode. |
|
bool is_native_; |
|
|
|
// The handler one calls to invoke this method. |
|
upb_byteshandler input_handler_; |
|
|
|
// The destination handlers this method is bound to. We own a ref. |
|
const upb_handlers *dest_handlers_; |
|
|
|
// The dispatch table layout is: |
|
// [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ] |
|
// |
|
// If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup |
|
// (UPB_MAX_FIELDNUMBER + fieldnum) and jump there. |
|
// |
|
// We need two wire types because of packed/non-packed compatibility. A |
|
// primitive repeated field can use either wire type and be valid. While we |
|
// could key the table on fieldnum+wiretype, the table would be 8x sparser. |
|
// |
|
// Storing two wire types in the primary value allows us to quickly rule out |
|
// the second wire type without needing to do a separate lookup (this case is |
|
// less common than an unknown field). |
|
upb_inttable dispatch; |
|
}; |
|
|
|
#ifdef __cplusplus |
|
|
|
// A Decoder receives binary protobuf data on its input sink and pushes the |
|
// decoded data to its output sink. |
|
class upb::pb::Decoder { |
|
public: |
|
// Constructs a decoder instance for the given method, which must outlive this |
|
// decoder. Any errors during parsing will be set on the given status, which |
|
// must also outlive this decoder. |
|
Decoder(const DecoderMethod* method, Status* status); |
|
~Decoder(); |
|
|
|
// Returns the DecoderMethod this decoder is parsing from. |
|
// TODO(haberman): Do users need to be able to rebind this? |
|
const DecoderMethod* method() const; |
|
|
|
// Resets the state of the decoder. |
|
void Reset(); |
|
|
|
// Resets the output sink of the Decoder. |
|
// The given sink must match method()->dest_handlers(). |
|
// |
|
// This must be called at least once before the decoder can be used. It may |
|
// only be called with the decoder is in a state where it was just created or |
|
// reset with pipeline.Reset(). The given sink must be from the same pipeline |
|
// as this decoder. |
|
bool ResetOutput(Sink* sink); |
|
|
|
// The sink on which this decoder receives input. |
|
BytesSink* input(); |
|
|
|
private: |
|
UPB_DISALLOW_COPY_AND_ASSIGN(Decoder); |
|
#else |
|
struct upb_pbdecoder { |
|
#endif |
|
// Our input sink. |
|
upb_bytessink input_; |
|
|
|
// The decoder method we are parsing with (owned). |
|
const upb_pbdecodermethod *method_; |
|
|
|
size_t call_len; |
|
const uint32_t *pc, *last; |
|
|
|
// Current input buffer and its stream offset. |
|
const char *buf, *ptr, *end, *checkpoint; |
|
|
|
// End of the delimited region, relative to ptr, or NULL if not in this buf. |
|
const char *delim_end; |
|
|
|
// End of the delimited region, relative to ptr, or end if not in this buf. |
|
const char *data_end; |
|
|
|
// Overall stream offset of "buf." |
|
uint64_t bufstart_ofs; |
|
|
|
// Buffer for residual bytes not parsed from the previous buffer. |
|
// The maximum number of residual bytes we require is 12; a five-byte |
|
// unknown tag plus an eight-byte value, less one because the value |
|
// is only a partial value. |
|
char residual[12]; |
|
char *residual_end; |
|
|
|
// Stores the user buffer passed to our decode function. |
|
const char *buf_param; |
|
size_t size_param; |
|
const upb_bufhandle *handle; |
|
|
|
#ifdef UPB_USE_JIT_X64 |
|
// Used momentarily by the generated code to store a value while a user |
|
// function is called. |
|
uint32_t tmp_len; |
|
|
|
const void *saved_rsp; |
|
#endif |
|
|
|
upb_status *status; |
|
|
|
// Our internal stack. |
|
upb_pbdecoder_frame *top, *limit; |
|
upb_pbdecoder_frame stack[UPB_DECODER_MAX_NESTING]; |
|
#ifdef UPB_USE_JIT_X64 |
|
// Each native stack frame needs two pointers, plus we need a few frames for |
|
// the enter/exit trampolines. |
|
const uint32_t *callstack[(UPB_DECODER_MAX_NESTING * 2) + 10]; |
|
#else |
|
const uint32_t *callstack[UPB_DECODER_MAX_NESTING]; |
|
#endif |
|
}; |
|
|
|
#ifdef __cplusplus |
|
|
|
// A class for caching protobuf processing code, whether bytecode for the |
|
// interpreted decoder or machine code for the JIT. |
|
// |
|
// This class is not thread-safe. |
|
class upb::pb::CodeCache { |
|
public: |
|
CodeCache(); |
|
~CodeCache(); |
|
|
|
// Whether the cache is allowed to generate machine code. Defaults to true. |
|
// There is no real reason to turn it off except for testing or if you are |
|
// having a specific problem with the JIT. |
|
// |
|
// Note that allow_jit = true does not *guarantee* that the code will be JIT |
|
// compiled. If this platform is not supported or the JIT was not compiled |
|
// in, the code may still be interpreted. |
|
bool allow_jit() const; |
|
|
|
// This may only be called when the object is first constructed, and prior to |
|
// any code generation, otherwise returns false and does nothing. |
|
bool set_allow_jit(bool allow); |
|
|
|
// Returns a DecoderMethod that can push data to the given handlers. |
|
// If a suitable method already exists, it will be returned from the cache. |
|
// |
|
// Specifying the destination handlers here allows the DecoderMethod to be |
|
// statically bound to the destination handlers if possible, which can allow |
|
// more efficient decoding. However the returned method may or may not |
|
// actually be statically bound. But in all cases, the returned method can |
|
// push data to the given handlers. |
|
const DecoderMethod *GetDecoderMethod(const DecoderMethodOptions& opts); |
|
|
|
// If/when someone needs to explicitly create a dynamically-bound |
|
// DecoderMethod*, we can add a method to get it here. |
|
|
|
private: |
|
UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache); |
|
#else |
|
struct upb_pbcodecache { |
|
#endif |
|
bool allow_jit_; |
|
|
|
// Array of mgroups. |
|
upb_inttable groups; |
|
}; |
|
|
|
|
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
void upb_pbdecoder_init(upb_pbdecoder *d, const upb_pbdecodermethod *method, |
|
upb_status *status); |
|
void upb_pbdecoder_uninit(upb_pbdecoder *d); |
|
void upb_pbdecoder_reset(upb_pbdecoder *d); |
|
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); |
|
bool upb_pbdecoder_resetoutput(upb_pbdecoder *d, upb_sink *sink); |
|
upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d); |
|
|
|
void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts, |
|
const upb_handlers *h); |
|
void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy); |
|
|
|
void upb_pbdecodermethod_ref(const upb_pbdecodermethod *m, const void *owner); |
|
void upb_pbdecodermethod_unref(const upb_pbdecodermethod *m, const void *owner); |
|
void upb_pbdecodermethod_donateref(const upb_pbdecodermethod *m, |
|
const void *from, const void *to); |
|
void upb_pbdecodermethod_checkref(const upb_pbdecodermethod *m, |
|
const void *owner); |
|
const upb_handlers *upb_pbdecodermethod_desthandlers( |
|
const upb_pbdecodermethod *m); |
|
const upb_byteshandler *upb_pbdecodermethod_inputhandler( |
|
const upb_pbdecodermethod *m); |
|
bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m); |
|
const upb_pbdecodermethod *upb_pbdecodermethod_new( |
|
const upb_pbdecodermethodopts *opts, const void *owner); |
|
|
|
void upb_pbcodecache_init(upb_pbcodecache *c); |
|
void upb_pbcodecache_uninit(upb_pbcodecache *c); |
|
bool upb_pbcodecache_allowjit(const upb_pbcodecache *c); |
|
bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow); |
|
const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod( |
|
upb_pbcodecache *c, const upb_pbdecodermethodopts *opts); |
|
|
|
#ifdef __cplusplus |
|
} /* extern "C" */ |
|
#endif |
|
|
|
#ifdef __cplusplus |
|
|
|
namespace upb { |
|
|
|
template<> |
|
class Pointer<pb::DecoderMethod> { |
|
public: |
|
explicit Pointer(pb::DecoderMethod* ptr) : ptr_(ptr) {} |
|
operator pb::DecoderMethod*() { return ptr_; } |
|
operator RefCounted*() { return UPB_UPCAST(ptr_); } |
|
private: |
|
pb::DecoderMethod* ptr_; |
|
}; |
|
|
|
template<> |
|
class Pointer<const pb::DecoderMethod> { |
|
public: |
|
explicit Pointer(const pb::DecoderMethod* ptr) : ptr_(ptr) {} |
|
operator const pb::DecoderMethod*() { return ptr_; } |
|
operator const RefCounted*() { return UPB_UPCAST(ptr_); } |
|
private: |
|
const pb::DecoderMethod* ptr_; |
|
}; |
|
|
|
namespace pb { |
|
|
|
inline Decoder::Decoder(const DecoderMethod* m, Status* s) { |
|
upb_pbdecoder_init(this, m, s); |
|
} |
|
inline Decoder::~Decoder() { |
|
upb_pbdecoder_uninit(this); |
|
} |
|
inline const DecoderMethod* Decoder::method() const { |
|
return upb_pbdecoder_method(this); |
|
} |
|
inline void Decoder::Reset() { |
|
upb_pbdecoder_reset(this); |
|
} |
|
inline bool Decoder::ResetOutput(Sink* sink) { |
|
return upb_pbdecoder_resetoutput(this, sink); |
|
} |
|
inline BytesSink* Decoder::input() { |
|
return upb_pbdecoder_input(this); |
|
} |
|
|
|
inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) { |
|
upb_pbdecodermethodopts_init(this, h); |
|
} |
|
inline void DecoderMethodOptions::set_lazy(bool lazy) { |
|
upb_pbdecodermethodopts_setlazy(this, lazy); |
|
} |
|
|
|
inline void DecoderMethod::Ref(const void *owner) const { |
|
upb_pbdecodermethod_ref(this, owner); |
|
} |
|
inline void DecoderMethod::Unref(const void *owner) const { |
|
upb_pbdecodermethod_unref(this, owner); |
|
} |
|
inline void DecoderMethod::DonateRef(const void *from, const void *to) const { |
|
upb_pbdecodermethod_donateref(this, from, to); |
|
} |
|
inline void DecoderMethod::CheckRef(const void *owner) const { |
|
upb_pbdecodermethod_checkref(this, owner); |
|
} |
|
inline const Handlers* DecoderMethod::dest_handlers() const { |
|
return upb_pbdecodermethod_desthandlers(this); |
|
} |
|
inline const BytesHandler* DecoderMethod::input_handler() const { |
|
return upb_pbdecodermethod_inputhandler(this); |
|
} |
|
inline bool DecoderMethod::is_native() const { |
|
return upb_pbdecodermethod_isnative(this); |
|
} |
|
// static |
|
inline reffed_ptr<const DecoderMethod> DecoderMethod::New( |
|
const DecoderMethodOptions &opts) { |
|
const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m); |
|
return reffed_ptr<const DecoderMethod>(m, &m); |
|
} |
|
|
|
inline CodeCache::CodeCache() { |
|
upb_pbcodecache_init(this); |
|
} |
|
inline CodeCache::~CodeCache() { |
|
upb_pbcodecache_uninit(this); |
|
} |
|
inline bool CodeCache::allow_jit() const { |
|
return upb_pbcodecache_allowjit(this); |
|
} |
|
inline bool CodeCache::set_allow_jit(bool allow) { |
|
return upb_pbcodecache_setallowjit(this, allow); |
|
} |
|
inline const DecoderMethod *CodeCache::GetDecoderMethod( |
|
const DecoderMethodOptions& opts) { |
|
return upb_pbcodecache_getdecodermethod(this, &opts); |
|
} |
|
|
|
} // namespace pb |
|
} // namespace upb |
|
|
|
#endif // __cplusplus |
|
|
|
#endif /* UPB_DECODER_H_ */
|
|
|