Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
403 lines
17 KiB
403 lines
17 KiB
// Protocol Buffers - Google's data interchange format |
|
// Copyright 2023 Google LLC. All rights reserved. |
|
// |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file or at |
|
// https://developers.google.com/open-source/licenses/bsd |
|
|
|
#ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ |
|
#define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ |
|
|
|
#include <string.h> |
|
|
|
#include "upb/mem/arena.h" |
|
|
|
// Must be last. |
|
#include "upb/port/def.inc" |
|
|
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
// The maximum number of bytes a single protobuf field can take up in the |
|
// wire format. We only want to do one bounds check per field, so the input |
|
// stream guarantees that after upb_EpsCopyInputStream_IsDone() is called, |
|
// the decoder can read this many bytes without performing another bounds |
|
// check. The stream will copy into a patch buffer as necessary to guarantee |
|
// this invariant. |
|
#define kUpb_EpsCopyInputStream_SlopBytes 16 |
|
|
|
typedef struct { |
|
const char* end; // Can read up to SlopBytes bytes beyond this. |
|
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) |
|
uintptr_t input_delta; // Diff between the original input pointer and patch |
|
int limit; // Submessage limit relative to end |
|
bool error; // To distinguish between EOF and error. |
|
bool aliasing; |
|
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; |
|
} upb_EpsCopyInputStream; |
|
|
|
// Returns true if the stream is in the error state. A stream enters the error |
|
// state when the user reads past a limit (caught in IsDone()) or the |
|
// ZeroCopyInputStream returns an error. |
|
UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) { |
|
return e->error; |
|
} |
|
|
|
typedef const char* upb_EpsCopyInputStream_BufferFlipCallback( |
|
upb_EpsCopyInputStream* e, const char* old_end, const char* new_start); |
|
|
|
typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc( |
|
upb_EpsCopyInputStream* e, const char* ptr, int overrun); |
|
|
|
// Initializes a upb_EpsCopyInputStream using the contents of the buffer |
|
// [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least |
|
// kUpb_EpsCopyInputStream_SlopBytes are available to read. |
|
UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, |
|
const char** ptr, size_t size, |
|
bool enable_aliasing) { |
|
if (size <= kUpb_EpsCopyInputStream_SlopBytes) { |
|
memset(&e->patch, 0, 32); |
|
if (size) memcpy(&e->patch, *ptr, size); |
|
e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch; |
|
*ptr = e->patch; |
|
e->end = *ptr + size; |
|
e->limit = 0; |
|
} else { |
|
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; |
|
e->limit = kUpb_EpsCopyInputStream_SlopBytes; |
|
e->input_delta = 0; |
|
} |
|
e->aliasing = enable_aliasing; |
|
e->limit_ptr = e->end; |
|
e->error = false; |
|
} |
|
|
|
typedef enum { |
|
// The current stream position is at a limit. |
|
kUpb_IsDoneStatus_Done, |
|
|
|
// The current stream position is not at a limit. |
|
kUpb_IsDoneStatus_NotDone, |
|
|
|
// The current stream position is not at a limit, and the stream needs to |
|
// be flipped to a new buffer before more data can be read. |
|
kUpb_IsDoneStatus_NeedFallback, |
|
} upb_IsDoneStatus; |
|
|
|
// Returns the status of the current stream position. This is a low-level |
|
// function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible. |
|
UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus( |
|
upb_EpsCopyInputStream* e, const char* ptr, int* overrun) { |
|
*overrun = ptr - e->end; |
|
if (UPB_LIKELY(ptr < e->limit_ptr)) { |
|
return kUpb_IsDoneStatus_NotDone; |
|
} else if (UPB_LIKELY(*overrun == e->limit)) { |
|
return kUpb_IsDoneStatus_Done; |
|
} else { |
|
return kUpb_IsDoneStatus_NeedFallback; |
|
} |
|
} |
|
|
|
// Returns true if the stream has hit a limit, either the current delimited |
|
// limit or the overall end-of-stream. As a side effect, this function may flip |
|
// the pointer to a new buffer if there are less than |
|
// kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer. |
|
// |
|
// Postcondition: if the function returns false, there are at least |
|
// kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr. |
|
UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback( |
|
upb_EpsCopyInputStream* e, const char** ptr, |
|
upb_EpsCopyInputStream_IsDoneFallbackFunc* func) { |
|
int overrun; |
|
switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) { |
|
case kUpb_IsDoneStatus_Done: |
|
return true; |
|
case kUpb_IsDoneStatus_NotDone: |
|
return false; |
|
case kUpb_IsDoneStatus_NeedFallback: |
|
*ptr = func(e, *ptr, overrun); |
|
return *ptr == NULL; |
|
} |
|
UPB_UNREACHABLE(); |
|
} |
|
|
|
const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( |
|
upb_EpsCopyInputStream* e, const char* ptr, int overrun); |
|
|
|
// A simpler version of IsDoneWithCallback() that does not support a buffer flip |
|
// callback. Useful in cases where we do not need to insert custom logic at |
|
// every buffer flip. |
|
// |
|
// If this returns true, the user must call upb_EpsCopyInputStream_IsError() |
|
// to distinguish between EOF and error. |
|
UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e, |
|
const char** ptr) { |
|
return upb_EpsCopyInputStream_IsDoneWithCallback( |
|
e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback); |
|
} |
|
|
|
// Returns the total number of bytes that are safe to read from the current |
|
// buffer without reading uninitialized or unallocated memory. |
|
// |
|
// Note that this check does not respect any semantic limits on the stream, |
|
// either limits from PushLimit() or the overall stream end, so some of these |
|
// bytes may have unpredictable, nonsense values in them. The guarantee is only |
|
// that the bytes are valid to read from the perspective of the C language |
|
// (ie. you can read without triggering UBSAN or ASAN). |
|
UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable( |
|
upb_EpsCopyInputStream* e, const char* ptr) { |
|
return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes; |
|
} |
|
|
|
// Returns true if the given delimited field size is valid (it does not extend |
|
// beyond any previously-pushed limits). `ptr` should point to the beginning |
|
// of the field data, after the delimited size. |
|
// |
|
// Note that this does *not* guarantee that all of the data for this field is in |
|
// the current buffer. |
|
UPB_INLINE bool upb_EpsCopyInputStream_CheckSize( |
|
const upb_EpsCopyInputStream* e, const char* ptr, int size) { |
|
UPB_ASSERT(size >= 0); |
|
return ptr - e->end + size <= e->limit; |
|
} |
|
|
|
UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable( |
|
upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) { |
|
// This is one extra branch compared to the more normal: |
|
// return (size_t)(end - ptr) < size; |
|
// However it is one less computation if we are just about to use "ptr + len": |
|
// https://godbolt.org/z/35YGPz |
|
// In microbenchmarks this shows a small improvement. |
|
uintptr_t uptr = (uintptr_t)ptr; |
|
uintptr_t uend = (uintptr_t)e->limit_ptr; |
|
uintptr_t res = uptr + (size_t)size; |
|
if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes; |
|
// NOTE: this check depends on having a linear address space. This is not |
|
// technically guaranteed by uintptr_t. |
|
bool ret = res >= uptr && res <= uend; |
|
if (size < 0) UPB_ASSERT(!ret); |
|
return ret; |
|
} |
|
|
|
// Returns true if the given delimited field size is valid (it does not extend |
|
// beyond any previously-pushed limited) *and* all of the data for this field is |
|
// available to be read in the current buffer. |
|
// |
|
// If the size is negative, this function will always return false. This |
|
// property can be useful in some cases. |
|
UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable( |
|
upb_EpsCopyInputStream* e, const char* ptr, int size) { |
|
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false); |
|
} |
|
|
|
// Returns true if the given sub-message size is valid (it does not extend |
|
// beyond any previously-pushed limited) *and* all of the data for this |
|
// sub-message is available to be parsed in the current buffer. |
|
// |
|
// This implies that all fields from the sub-message can be parsed from the |
|
// current buffer while maintaining the invariant that we always have at least |
|
// kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of |
|
// any individual field start. |
|
// |
|
// If the size is negative, this function will always return false. This |
|
// property can be useful in some cases. |
|
UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( |
|
upb_EpsCopyInputStream* e, const char* ptr, int size) { |
|
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true); |
|
} |
|
|
|
// Returns true if aliasing_enabled=true was passed to |
|
// upb_EpsCopyInputStream_Init() when this stream was initialized. |
|
UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( |
|
upb_EpsCopyInputStream* e) { |
|
return e->aliasing; |
|
} |
|
|
|
// Returns true if aliasing_enabled=true was passed to |
|
// upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can |
|
// alias into the region [ptr, size] in an input buffer. |
|
UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( |
|
upb_EpsCopyInputStream* e, const char* ptr, size_t size) { |
|
// When EpsCopyInputStream supports streaming, this will need to become a |
|
// runtime check. |
|
return e->aliasing && |
|
upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size); |
|
} |
|
|
|
// Returns a pointer into an input buffer that corresponds to the parsing |
|
// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may |
|
// be different if we are currently parsing out of the patch buffer. |
|
UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr( |
|
upb_EpsCopyInputStream* e, const char* ptr) { |
|
return (const char*)(((uintptr_t)ptr) + e->input_delta); |
|
} |
|
|
|
// Returns a pointer into an input buffer that corresponds to the parsing |
|
// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may |
|
// be different if we are currently parsing out of the patch buffer. |
|
// |
|
// REQUIRES: Aliasing must be available for the given pointer. If the input is a |
|
// flat buffer and aliasing is enabled, then aliasing will always be available. |
|
UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( |
|
upb_EpsCopyInputStream* e, const char* ptr) { |
|
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); |
|
return upb_EpsCopyInputStream_GetInputPtr(e, ptr); |
|
} |
|
|
|
// Reads string data from the input, aliasing into the input buffer instead of |
|
// copying. The parsing pointer is passed in `*ptr`, and will be updated if |
|
// necessary to point to the actual input buffer. Returns the new parsing |
|
// pointer, which will be advanced past the string data. |
|
// |
|
// REQUIRES: Aliasing must be available for this data region (test with |
|
// upb_EpsCopyInputStream_AliasingAvailable(). |
|
UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased( |
|
upb_EpsCopyInputStream* e, const char** ptr, size_t size) { |
|
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)); |
|
const char* ret = *ptr + size; |
|
*ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr); |
|
UPB_ASSUME(ret != NULL); |
|
return ret; |
|
} |
|
|
|
// Skips `size` bytes of data from the input and returns a pointer past the end. |
|
// Returns NULL on end of stream or error. |
|
UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e, |
|
const char* ptr, int size) { |
|
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; |
|
return ptr + size; |
|
} |
|
|
|
// Copies `size` bytes of data from the input `ptr` into the buffer `to`, and |
|
// returns a pointer past the end. Returns NULL on end of stream or error. |
|
UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e, |
|
const char* ptr, void* to, |
|
int size) { |
|
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; |
|
memcpy(to, ptr, size); |
|
return ptr + size; |
|
} |
|
|
|
// Reads string data from the stream and advances the pointer accordingly. |
|
// If aliasing was enabled when the stream was initialized, then the returned |
|
// pointer will point into the input buffer if possible, otherwise new data |
|
// will be allocated from arena and copied into. We may be forced to copy even |
|
// if aliasing was enabled if the input data spans input buffers. |
|
// |
|
// Returns NULL if memory allocation failed, or we reached a premature EOF. |
|
UPB_INLINE const char* upb_EpsCopyInputStream_ReadString( |
|
upb_EpsCopyInputStream* e, const char** ptr, size_t size, |
|
upb_Arena* arena) { |
|
if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) { |
|
return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size); |
|
} else { |
|
// We need to allocate and copy. |
|
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) { |
|
return NULL; |
|
} |
|
UPB_ASSERT(arena); |
|
char* data = (char*)upb_Arena_Malloc(arena, size); |
|
if (!data) return NULL; |
|
const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size); |
|
*ptr = data; |
|
return ret; |
|
} |
|
} |
|
|
|
UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) { |
|
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); |
|
} |
|
|
|
// Pushes a limit onto the stack of limits for the current stream. The limit |
|
// will extend for `size` bytes beyond the position in `ptr`. Future calls to |
|
// upb_EpsCopyInputStream_IsDone() will return `true` when the stream position |
|
// reaches this limit. |
|
// |
|
// Returns a delta that the caller must store and supply to PopLimit() below. |
|
UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e, |
|
const char* ptr, int size) { |
|
int limit = size + (int)(ptr - e->end); |
|
int delta = e->limit - limit; |
|
_upb_EpsCopyInputStream_CheckLimit(e); |
|
UPB_ASSERT(limit <= e->limit); |
|
e->limit = limit; |
|
e->limit_ptr = e->end + UPB_MIN(0, limit); |
|
_upb_EpsCopyInputStream_CheckLimit(e); |
|
return delta; |
|
} |
|
|
|
// Pops the last limit that was pushed on this stream. This may only be called |
|
// once IsDone() returns true. The user must pass the delta that was returned |
|
// from PushLimit(). |
|
UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e, |
|
const char* ptr, |
|
int saved_delta) { |
|
UPB_ASSERT(ptr - e->end == e->limit); |
|
_upb_EpsCopyInputStream_CheckLimit(e); |
|
e->limit += saved_delta; |
|
e->limit_ptr = e->end + UPB_MIN(0, e->limit); |
|
_upb_EpsCopyInputStream_CheckLimit(e); |
|
} |
|
|
|
UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( |
|
upb_EpsCopyInputStream* e, const char* ptr, int overrun, |
|
upb_EpsCopyInputStream_BufferFlipCallback* callback) { |
|
if (overrun < e->limit) { |
|
// Need to copy remaining data into patch buffer. |
|
UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes); |
|
const char* old_end = ptr; |
|
const char* new_start = &e->patch[0] + overrun; |
|
memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0, |
|
kUpb_EpsCopyInputStream_SlopBytes); |
|
memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes); |
|
ptr = new_start; |
|
e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes]; |
|
e->limit -= kUpb_EpsCopyInputStream_SlopBytes; |
|
e->limit_ptr = e->end + e->limit; |
|
UPB_ASSERT(ptr < e->limit_ptr); |
|
e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start; |
|
return callback(e, old_end, new_start); |
|
} else { |
|
UPB_ASSERT(overrun > e->limit); |
|
e->error = true; |
|
return callback(e, NULL, NULL); |
|
} |
|
} |
|
|
|
typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc( |
|
upb_EpsCopyInputStream* e, const char* ptr, void* ctx); |
|
|
|
// Tries to perform a fast-path handling of the given delimited message data. |
|
// If the sub-message beginning at `*ptr` and extending for `len` is short and |
|
// fits within this buffer, calls `func` with `ctx` as a parameter, where the |
|
// pushing and popping of limits is handled automatically and with lower cost |
|
// than the normal PushLimit()/PopLimit() sequence. |
|
UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast( |
|
upb_EpsCopyInputStream* e, const char** ptr, int len, |
|
upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { |
|
if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) { |
|
return false; |
|
} |
|
|
|
// Fast case: Sub-message is <128 bytes and fits in the current buffer. |
|
// This means we can preserve limit/limit_ptr verbatim. |
|
const char* saved_limit_ptr = e->limit_ptr; |
|
int saved_limit = e->limit; |
|
e->limit_ptr = *ptr + len; |
|
e->limit = e->limit_ptr - e->end; |
|
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); |
|
*ptr = func(e, *ptr, ctx); |
|
e->limit_ptr = saved_limit_ptr; |
|
e->limit = saved_limit; |
|
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); |
|
return true; |
|
} |
|
|
|
#ifdef __cplusplus |
|
} /* extern "C" */ |
|
#endif |
|
|
|
#include "upb/port/undef.inc" |
|
|
|
#endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
|
|
|