Separated out buffering code into upb_EpsCopyInputStream.

This mirrors the structure of C++ protobuf, which has an EpsCopyInputStream class.

This will lay the foundation for making EpsCopyInputStream capable of true streaming, by reading its input from a ZeroCopyInputStream.  It also lets us test EpsCopyInputStream separately from the decoder: see the new unit test that fuzzes upb_EpsCopyInputStream.

After this CL is submitted, the two decoders (the normal decoder and the fast decoder) should no longer be accessing the members of upb_EpsCopyInputStream.

PiperOrigin-RevId: 494400285
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent 9e89142283
commit 68d1d91475
  1. 10
      BUILD
  2. 46
      upb/wire/decode.c
  3. 265
      upb/wire/decode_fast.c
  4. 99
      upb/wire/decode_internal.h
  5. 285
      upb/wire/eps_copy_input_stream.h
  6. 234
      upb/wire/eps_copy_input_stream_test.cc

10
BUILD

@ -924,6 +924,7 @@ cc_library(
"upb/wire/decode_fast.h",
"upb/wire/decode_internal.h",
"upb/wire/encode.h",
"upb/wire/eps_copy_input_stream.h",
"upb/wire/swap_internal.h",
"upb/wire/types.h",
],
@ -941,6 +942,15 @@ cc_library(
],
)
cc_test(
name = "eps_copy_input_stream_test",
srcs = ["upb/wire/eps_copy_input_stream_test.cc"],
deps = [
":wire_internal",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "hash",
srcs = [

@ -34,6 +34,7 @@
#include "upb/mini_table/enum_internal.h"
#include "upb/wire/common_internal.h"
#include "upb/wire/decode_internal.h"
#include "upb/wire/eps_copy_input_stream.h"
#include "upb/wire/swap_internal.h"
#include "upb/wire/types.h"
@ -94,6 +95,7 @@ const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status) {
UPB_LONGJMP(d->err, status);
return NULL;
}
static void _upb_Decoder_VerifyUtf8(upb_Decoder* d, const char* buf, int len) {
if (!_upb_Decoder_VerifyUtf8Inline(buf, len)) {
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8);
@ -169,7 +171,8 @@ static const char* upb_Decoder_DecodeSize(upb_Decoder* d, const char* ptr,
uint32_t* size) {
uint64_t size64;
ptr = _upb_Decoder_DecodeVarint(d, ptr, &size64);
if (size64 >= INT32_MAX || ptr - d->end + (int)size64 > d->limit) {
if (size64 >= INT32_MAX ||
!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, (int)size64)) {
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
}
*size = size64;
@ -216,15 +219,6 @@ static upb_Message* _upb_Decoder_NewSubMessage(
return msg;
}
UPB_NOINLINE
const char* _upb_Decoder_IsDoneFallback(upb_Decoder* d, const char* ptr,
int overrun) {
int status;
ptr = _upb_Decoder_IsDoneFallbackInline(d, ptr, overrun, &status);
if (ptr == NULL) _upb_Decoder_ErrorJmp(d, status);
return ptr;
}
static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr,
int size, upb_StringView* str) {
if (d->options & kUpb_DecodeOption_AliasString) {
@ -260,11 +254,11 @@ UPB_FORCEINLINE
static const char* _upb_Decoder_DecodeSubMessage(
upb_Decoder* d, const char* ptr, upb_Message* submsg,
const upb_MiniTableSub* subs, const upb_MiniTableField* field, int size) {
int saved_delta = _upb_Decoder_PushLimit(d, ptr, size);
int saved_delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, size);
const upb_MiniTable* subl = subs[field->submsg_index].submsg;
UPB_ASSERT(subl);
ptr = _upb_Decoder_RecurseSubMessage(d, ptr, submsg, subl, DECODE_NOGROUP);
_upb_Decoder_PopLimit(d, ptr, saved_delta);
upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_delta);
return ptr;
}
@ -411,7 +405,7 @@ static const char* _upb_Decoder_DecodeVarintPacked(
upb_Decoder* d, const char* ptr, upb_Array* arr, wireval* val,
const upb_MiniTableField* field, int lg2) {
int scale = 1 << lg2;
int saved_limit = _upb_Decoder_PushLimit(d, ptr, val->size);
int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size << lg2, void);
while (!_upb_Decoder_IsDone(d, &ptr)) {
wireval elem;
@ -424,7 +418,7 @@ static const char* _upb_Decoder_DecodeVarintPacked(
memcpy(out, &elem, scale);
out += scale;
}
_upb_Decoder_PopLimit(d, ptr, saved_limit);
upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
return ptr;
}
@ -434,7 +428,7 @@ static const char* _upb_Decoder_DecodeEnumPacked(
const upb_MiniTableSub* subs, const upb_MiniTableField* field,
wireval* val) {
const upb_MiniTableEnum* e = subs[field->submsg_index].subenum;
int saved_limit = _upb_Decoder_PushLimit(d, ptr, val->size);
int saved_limit = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size);
char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->size * 4, void);
while (!_upb_Decoder_IsDone(d, &ptr)) {
wireval elem;
@ -450,7 +444,7 @@ static const char* _upb_Decoder_DecodeEnumPacked(
memcpy(out, &elem, 4);
out += 4;
}
_upb_Decoder_PopLimit(d, ptr, saved_limit);
upb_EpsCopyInputStream_PopLimit(&d->input, ptr, saved_limit);
return ptr;
}
@ -1198,7 +1192,7 @@ static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
d->debug_tagstart = ptr;
#endif
UPB_ASSERT(ptr < d->limit_ptr);
UPB_ASSERT(ptr < d->input.limit_ptr);
ptr = _upb_Decoder_DecodeTag(d, ptr, &tag);
field_number = tag >> 3;
wire_type = tag & 7;
@ -1256,6 +1250,13 @@ static upb_DecodeStatus _upb_Decoder_DecodeTop(struct upb_Decoder* d,
return kUpb_DecodeStatus_Ok;
}
UPB_NOINLINE
const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
const char* ptr, int overrun) {
return _upb_EpsCopyInputStream_IsDoneFallbackInline(
e, ptr, overrun, _upb_Decoder_BufferFlipCallback);
}
upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
const upb_MiniTable* l,
const upb_ExtensionRegistry* extreg, int options,
@ -1263,20 +1264,11 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
upb_Decoder state;
unsigned depth = (unsigned)options >> 16;
if (size <= 16) {
memset(&state.patch, 0, 32);
if (size) memcpy(&state.patch, buf, size);
buf = state.patch;
state.end = buf + size;
state.limit = 0;
if (upb_EpsCopyInputStream_Init(&state.input, &buf, size)) {
options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf.
} else {
state.end = buf + size - 16;
state.limit = 16;
}
state.extreg = extreg;
state.limit_ptr = state.end;
state.unknown = NULL;
state.depth = depth ? depth : 64;
state.end_group = DECODE_NOGROUP;

@ -69,28 +69,27 @@ typedef enum {
UPB_NOINLINE
static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) {
int overrun = data;
int status;
ptr = _upb_Decoder_IsDoneFallbackInline(d, ptr, overrun, &status);
if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, status);
ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline(
&d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback);
data = _upb_FastDecoder_LoadTag(ptr);
UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS);
}
UPB_FORCEINLINE
static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) {
if (UPB_UNLIKELY(ptr >= d->limit_ptr)) {
int overrun = ptr - d->end;
if (UPB_LIKELY(overrun == d->limit)) {
// Parse is finished.
int overrun;
switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) {
case kUpb_IsDoneStatus_Done:
*(uint32_t*)msg |= hasbits; // Sync hasbits.
const upb_MiniTable* l = decode_totablep(table);
return UPB_UNLIKELY(l->required_count)
? _upb_Decoder_CheckRequired(d, ptr, msg, l)
: ptr;
} else {
case kUpb_IsDoneStatus_NotDone:
break;
case kUpb_IsDoneStatus_NeedFallback:
data = overrun;
UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);
}
}
// Read two bytes of tag data (for a one-byte tag, the high byte is junk).
@ -128,37 +127,17 @@ static const char* fastdecode_longsize(const char* ptr, int* size) {
}
UPB_FORCEINLINE
static bool fastdecode_boundscheck(const char* ptr, size_t len,
const char* end) {
uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)end + 16;
uintptr_t res = uptr + len;
return res < uptr || res > uend;
}
UPB_FORCEINLINE
static bool fastdecode_boundscheck2(const char* ptr, size_t len,
const char* end) {
// This is one extra branch compared to the more normal:
// return (size_t)(end - ptr) < size;
// However it is one less computation if we are just about to use "ptr + len":
// https://godbolt.org/z/35YGPz
// In microbenchmarks this shows an overall 4% improvement.
uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)end;
uintptr_t res = uptr + len;
return res < uptr || res > uend;
}
typedef const char* fastdecode_delimfunc(upb_Decoder* d, const char* ptr,
void* ctx);
UPB_FORCEINLINE
static const char* fastdecode_delimited(upb_Decoder* d, const char* ptr,
fastdecode_delimfunc* func, void* ctx) {
static const char* fastdecode_delimited(
upb_Decoder* d, const char* ptr,
upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
ptr++;
// Sign-extend so varint greater than one byte becomes negative, causing
// fast delimited parse to fail.
int len = (int8_t)ptr[-1];
if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) {
if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func,
ctx)) {
// Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
// If it exceeds the buffer limit, limit/limit_ptr will change during
// sub-message parsing, so we need to preserve delta, not limit.
@ -170,25 +149,13 @@ static const char* fastdecode_delimited(upb_Decoder* d, const char* ptr,
return NULL;
}
}
if (ptr - d->end + (int)len > d->limit) {
if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) {
// Corrupt wire format: invalid limit.
return NULL;
}
int delta = _upb_Decoder_PushLimit(d, ptr, len);
ptr = func(d, ptr, ctx);
_upb_Decoder_PopLimit(d, ptr, delta);
} else {
// Fast case: Sub-message is <128 bytes and fits in the current buffer.
// This means we can preserve limit/limit_ptr verbatim.
const char* saved_limit_ptr = d->limit_ptr;
int saved_limit = d->limit;
d->limit_ptr = ptr + len;
d->limit = d->limit_ptr - d->end;
UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
ptr = func(d, ptr, ctx);
d->limit_ptr = saved_limit_ptr;
d->limit = saved_limit;
UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len);
ptr = func(&d->input, ptr, ctx);
upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta);
}
return ptr;
}
@ -429,8 +396,9 @@ typedef struct {
} fastdecode_varintdata;
UPB_FORCEINLINE
static const char* fastdecode_topackedvarint(upb_Decoder* d, const char* ptr,
void* ctx) {
static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e,
const char* ptr, void* ctx) {
upb_Decoder* d = (upb_Decoder*)e;
fastdecode_varintdata* data = ctx;
void* dst = data->dst;
uint64_t val;
@ -578,7 +546,8 @@ TAGBYTES(p)
ptr = fastdecode_longsize(ptr, &size); \
} \
\
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
&d->input, ptr, size) || \
(size % valbytes) != 0)) { \
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
} \
@ -670,7 +639,8 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
ptr = fastdecode_longsize(ptr, &size); \
} \
\
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
&d->input, ptr, size))) { \
dst->size = 0; \
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
} \
@ -723,91 +693,92 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
UPB_POISON_MEMORY_REGION(data + size, copy - size);
}
#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
card, validate_utf8) \
upb_StringView* dst; \
fastdecode_arr farr; \
int64_t size; \
size_t arena_has; \
size_t common_has; \
char* buf; \
\
UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \
UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_StringView), card); \
\
again: \
if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
} \
\
size = (uint8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->size = size; \
\
buf = d->arena.head.ptr; \
arena_has = _upb_ArenaHas(&d->arena); \
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
\
if (UPB_LIKELY(size <= 15 - tagbytes)) { \
if (arena_has < 16) goto longstr; \
d->arena.head.ptr += 16; \
memcpy(buf, ptr - tagbytes - 1, 16); \
dst->data = buf + tagbytes + 1; \
} else if (UPB_LIKELY(size <= 32)) { \
if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
fastdecode_docopy(d, ptr, size, 32, buf, dst); \
} else if (UPB_LIKELY(size <= 64)) { \
if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
fastdecode_docopy(d, ptr, size, 64, buf, dst); \
} else if (UPB_LIKELY(size < 128)) { \
if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
fastdecode_docopy(d, ptr, size, 128, buf, dst); \
} else { \
goto longstr; \
} \
\
ptr += size; \
\
if (card == CARD_r) { \
if (validate_utf8 && \
!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \
} \
fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \
UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \
return ptr; \
} \
} \
\
if (card != CARD_r && validate_utf8) { \
data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} \
\
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
\
longstr: \
if (card == CARD_r) { \
fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \
} \
ptr--; \
if (validate_utf8) { \
UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
} else { \
UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
card, validate_utf8) \
upb_StringView* dst; \
fastdecode_arr farr; \
int64_t size; \
size_t arena_has; \
size_t common_has; \
char* buf; \
\
UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \
UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_StringView), card); \
\
again: \
if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
} \
\
size = (uint8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->size = size; \
\
buf = d->arena.head.ptr; \
arena_has = _upb_ArenaHas(&d->arena); \
common_has = UPB_MIN(arena_has, \
upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \
\
if (UPB_LIKELY(size <= 15 - tagbytes)) { \
if (arena_has < 16) goto longstr; \
d->arena.head.ptr += 16; \
memcpy(buf, ptr - tagbytes - 1, 16); \
dst->data = buf + tagbytes + 1; \
} else if (UPB_LIKELY(size <= 32)) { \
if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
fastdecode_docopy(d, ptr, size, 32, buf, dst); \
} else if (UPB_LIKELY(size <= 64)) { \
if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
fastdecode_docopy(d, ptr, size, 64, buf, dst); \
} else if (UPB_LIKELY(size < 128)) { \
if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
fastdecode_docopy(d, ptr, size, 128, buf, dst); \
} else { \
goto longstr; \
} \
\
ptr += size; \
\
if (card == CARD_r) { \
if (validate_utf8 && \
!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \
} \
fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \
UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \
return ptr; \
} \
} \
\
if (card != CARD_r && validate_utf8) { \
data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} \
\
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
\
longstr: \
if (card == CARD_r) { \
fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \
} \
ptr--; \
if (validate_utf8) { \
UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
} else { \
UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
}
#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
@ -837,7 +808,8 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
dst->data = ptr; \
dst->size = size; \
\
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
&d->input, ptr, size))) { \
ptr--; \
if (validate_utf8) { \
return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
@ -950,8 +922,9 @@ typedef struct {
} fastdecode_submsgdata;
UPB_FORCEINLINE
static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr,
void* ctx) {
static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e,
const char* ptr, void* ctx) {
upb_Decoder* d = (upb_Decoder*)e;
fastdecode_submsgdata* submsg = ctx;
ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0);
UPB_ASSUME(ptr != NULL);

@ -36,6 +36,7 @@
#include "upb/mem/arena_internal.h"
#include "upb/message/internal.h"
#include "upb/wire/decode.h"
#include "upb/wire/eps_copy_input_stream.h"
#include "utf8_range.h"
// Must be last.
@ -44,18 +45,14 @@
#define DECODE_NOGROUP (uint32_t) - 1
typedef struct upb_Decoder {
const char* end; /* Can read up to 16 bytes slop beyond this. */
const char* limit_ptr; /* = end + UPB_MIN(limit, 0) */
upb_Message* unknown_msg; /* Used for preserving unknown data. */
const char* unknown; /* Start of unknown data, preserve at buffer flip. */
const upb_ExtensionRegistry*
extreg; /* For looking up extensions during the parse. */
int limit; /* Submessage limit relative to end. */
int depth; /* Tracks recursion depth to bound stack usage. */
uint32_t end_group; /* field number of END_GROUP tag, else DECODE_NOGROUP */
upb_EpsCopyInputStream input;
const upb_ExtensionRegistry* extreg;
const char* unknown; // Start of unknown data, preserve at buffer flip
upb_Message* unknown_msg; // Pointer to preserve data to
int depth; // Tracks recursion depth to bound stack usage.
uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP.
uint16_t options;
bool missing_required;
char patch[32];
upb_Arena arena;
jmp_buf err;
@ -114,49 +111,29 @@ UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) {
return (const upb_MiniTable*)(table >> 8);
}
UPB_INLINE
const char* _upb_Decoder_IsDoneFallbackInline(upb_Decoder* d, const char* ptr,
int overrun, int* status) {
if (overrun < d->limit) {
/* Need to copy remaining data into patch buffer. */
UPB_ASSERT(overrun < 16);
if (d->unknown) {
if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown, ptr - d->unknown,
&d->arena)) {
*status = kUpb_DecodeStatus_OutOfMemory;
return NULL;
}
d->unknown = &d->patch[0] + overrun;
}
memset(d->patch + 16, 0, 16);
memcpy(d->patch, d->end, 16);
ptr = &d->patch[0] + overrun;
d->end = &d->patch[16];
d->limit -= 16;
d->limit_ptr = d->end + d->limit;
d->options &= ~kUpb_DecodeOption_AliasString;
UPB_ASSERT(ptr < d->limit_ptr);
return ptr;
} else {
*status = kUpb_DecodeStatus_Malformed;
return NULL;
}
const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
const char* ptr, int overrun);
UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
return upb_EpsCopyInputStream_IsDone(&d->input, ptr,
&_upb_Decoder_IsDoneFallback);
}
const char* _upb_Decoder_IsDoneFallback(upb_Decoder* d, const char* ptr,
int overrun);
UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
upb_Decoder* d = (upb_Decoder*)e;
if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
UPB_INLINE
bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
int overrun = *ptr - d->end;
if (UPB_LIKELY(*ptr < d->limit_ptr)) {
return false;
} else if (UPB_LIKELY(overrun == d->limit)) {
return true;
} else {
*ptr = _upb_Decoder_IsDoneFallback(d, *ptr, overrun);
return false;
if (d->unknown) {
if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown,
old_end - d->unknown, &d->arena)) {
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
}
d->unknown = new_start;
}
d->options &= ~kUpb_DecodeOption_AliasString;
return new_start;
}
#if UPB_FASTTABLE
@ -182,30 +159,6 @@ UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) {
return tag;
}
UPB_INLINE void _upb_Decoder_CheckLimit(upb_Decoder* d) {
UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
}
UPB_INLINE int _upb_Decoder_PushLimit(upb_Decoder* d, const char* ptr,
int size) {
int limit = size + (int)(ptr - d->end);
int delta = d->limit - limit;
_upb_Decoder_CheckLimit(d);
d->limit = limit;
d->limit_ptr = d->end + UPB_MIN(0, limit);
_upb_Decoder_CheckLimit(d);
return delta;
}
UPB_INLINE void _upb_Decoder_PopLimit(upb_Decoder* d, const char* ptr,
int saved_delta) {
UPB_ASSERT(ptr - d->end == d->limit);
_upb_Decoder_CheckLimit(d);
d->limit += saved_delta;
d->limit_ptr = d->end + UPB_MIN(0, d->limit);
_upb_Decoder_CheckLimit(d);
}
#include "upb/port/undef.inc"
#endif /* UPB_WIRE_DECODE_INTERNAL_H_ */

@ -0,0 +1,285 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
#define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
#include <string.h>
// Must be last.
#include "upb/port/def.inc"
// The maximum number of bytes a single protobuf field can take up in the
// wire format. We only want to do one bounds check per field, so the input
// stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
// the decoder can read this many bytes without performing another bounds
// check. The stream will copy into a patch buffer as necessary to guarantee
// this invariant.
#define kUpb_EpsCopyInputStream_SlopBytes 16
typedef struct {
const char* end; // Can read up to SlopBytes bytes beyond this.
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
int limit; // Submessage limit relative to end
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
} upb_EpsCopyInputStream;
typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
upb_EpsCopyInputStream* e, const char* ptr, int overrun);
// Initializes a upb_EpsCopyInputStream using the contents of the buffer
// [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least
// kUpb_EpsCopyInputStream_SlopBytes, and returns true if the pointer has been
// updated.
UPB_INLINE bool upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
const char** ptr, size_t size) {
bool ret;
if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
memset(&e->patch, 0, 32);
if (size) memcpy(&e->patch, *ptr, size);
*ptr = e->patch;
e->end = *ptr + size;
e->limit = 0;
ret = true;
} else {
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
e->limit = kUpb_EpsCopyInputStream_SlopBytes;
ret = false;
}
e->limit_ptr = e->end;
return ret;
}
typedef enum {
// The current stream position is at a limit.
kUpb_IsDoneStatus_Done,
// The current stream position is not at a limit.
kUpb_IsDoneStatus_NotDone,
// The current stream position is not at a limit, and the stream needs to
// be flipped to a new buffer before more data can be read.
kUpb_IsDoneStatus_NeedFallback,
} upb_IsDoneStatus;
// Returns the status of the current stream position. This is a low-level
// function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
*overrun = ptr - e->end;
if (UPB_LIKELY(ptr < e->limit_ptr)) {
return kUpb_IsDoneStatus_NotDone;
} else if (UPB_LIKELY(*overrun == e->limit)) {
return kUpb_IsDoneStatus_Done;
} else {
return kUpb_IsDoneStatus_NeedFallback;
}
}
// Returns true if the stream has hit a limit, either the current delimited
// limit or the overall end-of-stream. As a side effect, this function may flip
// the pointer to a new buffer if there are less than
// kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
//
// Postcondition: if the function returns false, there are at least
// kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
UPB_INLINE bool upb_EpsCopyInputStream_IsDone(
upb_EpsCopyInputStream* e, const char** ptr,
upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
int overrun;
switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
case kUpb_IsDoneStatus_Done:
return true;
case kUpb_IsDoneStatus_NotDone:
return false;
case kUpb_IsDoneStatus_NeedFallback:
*ptr = func(e, *ptr, overrun);
return *ptr == NULL;
}
}
// Returns the total number of bytes that are safe to read from the current
// buffer without reading uninitialized or unallocated memory.
//
// Note that this check does not respect any semantic limits on the stream,
// either limits from PushLimit() or the overall stream end, so some of these
// bytes may have unpredictable, nonsense values in them. The guarantee is only
// that the bytes are valid to read from the perspective of the C language
// (ie. you can read without triggering UBSAN or ASAN).
UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
upb_EpsCopyInputStream* e, const char* ptr) {
return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
}
// Returns true if the given delimited field size is valid (it does not extend
// beyond any previously-pushed limits). `ptr` should point to the beginning
// of the field data, after the delimited size.
//
// Note that this does *not* guarantee that all of the data for this field is in
// the current buffer.
UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
const upb_EpsCopyInputStream* e, const char* ptr, int size) {
UPB_ASSERT(size >= 0);
return ptr - e->end + size <= e->limit;
}
UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
// This is one extra branch compared to the more normal:
// return (size_t)(end - ptr) < size;
// However it is one less computation if we are just about to use "ptr + len":
// https://godbolt.org/z/35YGPz
// In microbenchmarks this shows a small improvement.
uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)e->limit_ptr;
uintptr_t res = uptr + (size_t)size;
if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
// NOTE: this check depends on having a linear address space. This is not
// technically guaranteed by uintptr_t.
bool ret = res >= uptr && res <= uend;
if (size < 0) UPB_ASSERT(!ret);
return ret;
}
// Returns true if the given delimited field size is valid (it does not extend
// beyond any previously-pushed limited) *and* all of the data for this field is
// available to be read in the current buffer.
//
// If the size is negative, this function will always return false. This
// property can be useful in some cases.
UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
upb_EpsCopyInputStream* e, const char* ptr, int size) {
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
}
// Returns true if the given sub-message size is valid (it does not extend
// beyond any previously-pushed limited) *and* all of the data for this
// sub-message is available to be parsed in the current buffer.
//
// This implies that all fields from the sub-message can be parsed from the
// current buffer while maintaining the invariant that we always have at least
// kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
// any individual field start.
//
// If the size is negative, this function will always return false. This
// property can be useful in some cases.
UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
upb_EpsCopyInputStream* e, const char* ptr, int size) {
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
}
UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
}
// Pushes a limit onto the stack of limits for the current stream. The limit
// will extend for `size` bytes beyond the position in `ptr`. Future calls to
// upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
// reaches this limit.
//
// Returns a delta that the caller must store and supply to PopLimit() below.
UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
const char* ptr, int size) {
int limit = size + (int)(ptr - e->end);
int delta = e->limit - limit;
_upb_EpsCopyInputStream_CheckLimit(e);
e->limit = limit;
e->limit_ptr = e->end + UPB_MIN(0, limit);
_upb_EpsCopyInputStream_CheckLimit(e);
return delta;
}
// Pops the last limit that was pushed on this stream. This may only be called
// once IsDone() returns true. The user must pass the delta that was returned
// from PushLimit().
UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
const char* ptr,
int saved_delta) {
UPB_ASSERT(ptr - e->end == e->limit);
_upb_EpsCopyInputStream_CheckLimit(e);
e->limit += saved_delta;
e->limit_ptr = e->end + UPB_MIN(0, e->limit);
_upb_EpsCopyInputStream_CheckLimit(e);
}
UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
upb_EpsCopyInputStream* e, const char* ptr, int overrun,
upb_EpsCopyInputStream_BufferFlipCallback* callback) {
if (overrun < e->limit) {
// Need to copy remaining data into patch buffer.
UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
const char* old_end = ptr;
const char* new_start = &e->patch[0] + overrun;
memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
kUpb_EpsCopyInputStream_SlopBytes);
memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
ptr = new_start;
e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
e->limit_ptr = e->end + e->limit;
UPB_ASSERT(ptr < e->limit_ptr);
return callback(e, old_end, new_start);
} else {
return callback(e, NULL, NULL);
}
}
typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
// Tries to perform a fast-path handling of the given delimited message data.
// If the sub-message beginning at `*ptr` and extending for `len` is short and
// fits within this buffer, calls `func` with `ctx` as a parameter, where the
// pushing and popping of limits is handled automatically and with lower cost
// than the normal PushLimit()/PopLimit() sequence.
UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
upb_EpsCopyInputStream* e, const char** ptr, int len,
upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
return false;
}
// Fast case: Sub-message is <128 bytes and fits in the current buffer.
// This means we can preserve limit/limit_ptr verbatim.
const char* saved_limit_ptr = e->limit_ptr;
int saved_limit = e->limit;
e->limit_ptr = *ptr + len;
e->limit = e->limit_ptr - e->end;
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
*ptr = func(e, *ptr, ctx);
e->limit_ptr = saved_limit_ptr;
e->limit = saved_limit;
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
return true;
}
#include "upb/port/undef.inc"
#endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_

@ -0,0 +1,234 @@
#include "upb/wire/eps_copy_input_stream.h"
#include <string.h>
#include <string>
#include "gtest/gtest.h"
// begin:google_only
// #include "testing/fuzzing/fuzztest.h"
// end:google_only
namespace {
TEST(EpsCopyInputStreamTest, ZeroSize) {
upb_EpsCopyInputStream stream;
const char* ptr = NULL;
upb_EpsCopyInputStream_Init(&stream, &ptr, 0);
EXPECT_TRUE(upb_EpsCopyInputStream_IsDone(&stream, &ptr, NULL));
}
// begin:google_only
//
// // We create a simple, trivial implementation of the stream that we can test
// // our real implementation against.
//
// class FakeStream {
// public:
// FakeStream(const std::string& data) : data_(data), offset_(0) {
// limits_.push_back(data.size());
// }
//
// // If we reached one or more limits correctly, returns the number of limits
// // ended. If we tried to read beyond the current limit, returns -1.
// // Otherwise, for simple success, returns 0.
// int ReadData(int n, std::string* data) {
// if (n > BytesUntilLimit()) return -1;
//
// data->assign(data_.data() + offset_, n);
// offset_ += n;
//
// int end_limit_count = 0;
//
// while (BytesUntilLimit() == 0) {
// if (PopLimit()) {
// end_limit_count++;
// } else {
// eof_ = true;
// break;
// }
// }
//
// return end_limit_count;
// }
//
// bool TryPushLimit(int limit) {
// if (!CheckSize(limit)) return false;
// limits_.push_back(offset_ + limit);
// return true;
// }
//
// bool IsEof() const { return eof_; }
//
// private:
// int BytesUntilLimit() const { return limits_.back() - offset_; }
// bool CheckSize(int size) const { return BytesUntilLimit() >= size; }
//
// // Return false on EOF.
// bool PopLimit() {
// limits_.pop_back();
// return !limits_.empty();
// }
//
// std::string data_;
// // Limits, specified in absolute stream terms.
// std::vector<int> limits_;
// int offset_;
// bool eof_ = false;
// };
//
// char tmp_buf[kUpb_EpsCopyInputStream_SlopBytes];
//
// class EpsStream {
// public:
// EpsStream(const std::string& data) : data_(data) {
// ptr_ = data_.data();
// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size());
// }
//
// // Returns false at EOF or error.
// int ReadData(int n, std::string* data) {
// // We want to verify that we can read kUpb_EpsCopyInputStream_SlopBytes
// // safely, even if we haven't actually been requested to read that much.
// // We copy to a global buffer so the copy can't be optimized away.
// memcpy(&tmp_buf, ptr_, kUpb_EpsCopyInputStream_SlopBytes);
// data->assign(tmp_buf, n);
// ptr_ += n;
//
// int end_limit_count = 0;
//
// while (IsAtLimit()) {
// if (error_) return -1;
// if (PopLimit()) {
// end_limit_count++;
// } else {
// eof_ = true; // EOF.
// break;
// }
// }
//
// return error_ ? -1 : end_limit_count;
// }
//
// bool TryPushLimit(int limit) {
// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false;
// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit));
// return true;
// }
//
// bool IsEof() const { return eof_; }
//
// private:
// bool IsAtLimit() {
// return upb_EpsCopyInputStream_IsDone(&eps_, &ptr_,
// &EpsStream::IsDoneFallback);
// }
//
// // Return false on EOF.
// bool PopLimit() {
// if (deltas_.empty()) return false;
// upb_EpsCopyInputStream_PopLimit(&eps_, ptr_, deltas_.back());
// deltas_.pop_back();
// return true;
// }
//
// static const char* IsDoneFallback(upb_EpsCopyInputStream* e, const char* ptr,
// int overrun) {
// return _upb_EpsCopyInputStream_IsDoneFallbackInline(
// e, ptr, overrun, &EpsStream::BufferFlipCallback);
// }
//
// static const char* BufferFlipCallback(upb_EpsCopyInputStream* e,
// const char* old_end,
// const char* new_start) {
// EpsStream* stream = reinterpret_cast<EpsStream*>(e);
// if (!old_end) stream->error_ = true;
// return new_start;
// }
//
// upb_EpsCopyInputStream eps_;
// std::string data_;
// const char* ptr_;
// std::vector<int> deltas_;
// bool error_ = false;
// bool eof_ = false;
// };
//
// // Reads N bytes from the given position.
// struct ReadOp {
// int bytes;
// };
//
// // Pushes a new limit of N bytes from the current position.
// struct PushLimitOp {
// int bytes;
// };
//
// typedef std::variant<ReadOp, PushLimitOp> Op;
//
// struct EpsCopyTestScript {
// int data_size;
// std::vector<Op> ops;
// };
//
// auto ArbitraryEpsCopyTestScript() {
// using ::fuzztest::InRange;
// using ::fuzztest::NonNegative;
// using ::fuzztest::StructOf;
// using ::fuzztest::VariantOf;
// using ::fuzztest::VectorOf;
//
// int max_data_size = 512;
//
// return StructOf<EpsCopyTestScript>(
// InRange(0, max_data_size), // data_size
// VectorOf(VariantOf(
// // ReadOp
// StructOf<ReadOp>(InRange(0, kUpb_EpsCopyInputStream_SlopBytes)),
// // PushLimitOp
// StructOf<PushLimitOp>(NonNegative<int>()))));
// }
//
// // Run a test that creates both real stream and a fake stream, and validates
// // that they have the same behavior.
// void TestAgainstFakeStream(const EpsCopyTestScript& script) {
// std::string data(script.data_size, 'x');
// for (int i = 0; i < script.data_size; ++i) {
// data[i] = static_cast<char>(i & 0xff);
// }
//
// FakeStream fake_stream(data);
// EpsStream eps_stream(data);
//
// for (const auto& op : script.ops) {
// if (const ReadOp* read_op = std::get_if<ReadOp>(&op)) {
// std::string data_fake;
// std::string data_eps;
// int fake_result = fake_stream.ReadData(read_op->bytes, &data_fake);
// int eps_result = eps_stream.ReadData(read_op->bytes, &data_eps);
// EXPECT_EQ(fake_result, eps_result);
// if (fake_result == -1) break; // Error
// EXPECT_EQ(data_fake, data_eps);
// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof());
// if (fake_stream.IsEof()) break;
// } else if (const PushLimitOp* push = std::get_if<PushLimitOp>(&op)) {
// EXPECT_EQ(fake_stream.TryPushLimit(push->bytes),
// eps_stream.TryPushLimit(push->bytes));
// }
// }
// }
//
// FUZZ_TEST(EpsCopyFuzzTest, TestAgainstFakeStream)
// .WithDomains(ArbitraryEpsCopyTestScript());
//
// TEST(EpsCopyFuzzTest, TestAgainstFakeStreamRegression) {
// TestAgainstFakeStream({299,
// {
// PushLimitOp{2},
// ReadOp{14},
// }});
// }
//
// end:google_only
} // namespace
Loading…
Cancel
Save