Moved aliasing logic for string field parsing into EpsCopyInputStream.

Moving the logic down to EpsCopyInputStream makes it easier to test and reuse this functionality.

We also implement aliasing for the final bytes of the patch buffer, which has never been supported before.  We used to always force a copy for any data parsed out of the patch buffer at the end of the stream.

Much of this logic is ported directly from the C++ EpsCopyInputStream class.

PiperOrigin-RevId: 498091644
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent f50e8b221b
commit a48af3f824
  1. 1
      BUILD
  2. 19
      upb/wire/decode.c
  3. 43
      upb/wire/decode_fast.c
  4. 2
      upb/wire/decode_internal.h
  5. 81
      upb/wire/eps_copy_input_stream.h
  6. 118
      upb/wire/eps_copy_input_stream_test.cc

@ -956,6 +956,7 @@ cc_test(
name = "eps_copy_input_stream_test",
srcs = ["upb/wire/eps_copy_input_stream_test.cc"],
deps = [
":upb",
":wire_internal",
"@com_google_googletest//:gtest_main",
],

@ -221,16 +221,12 @@ static upb_Message* _upb_Decoder_NewSubMessage(
static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr,
int size, upb_StringView* str) {
if (d->options & kUpb_DecodeOption_AliasString) {
str->data = ptr;
} else {
char* data = upb_Arena_Malloc(&d->arena, size);
if (!data) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
memcpy(data, ptr, size);
str->data = data;
}
const char* str_ptr = ptr;
ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena);
if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
str->data = str_ptr;
str->size = size;
return ptr + size;
return ptr;
}
UPB_FORCEINLINE
@ -1264,9 +1260,8 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
upb_Decoder state;
unsigned depth = (unsigned)options >> 16;
if (upb_EpsCopyInputStream_Init(&state.input, &buf, size)) {
options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf.
}
upb_EpsCopyInputStream_Init(&state.input, &buf, size,
options & kUpb_DecodeOption_AliasString);
state.extreg = extreg;
state.unknown = NULL;

@ -639,26 +639,17 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
ptr = fastdecode_longsize(ptr, &size); \
} \
\
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
&d->input, ptr, size))) { \
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \
dst->size = 0; \
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
} \
\
if (d->options & kUpb_DecodeOption_AliasString) { \
dst->data = ptr; \
dst->size = size; \
} else { \
char* data = upb_Arena_Malloc(&d->arena, size); \
if (!data) { \
_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \
} \
memcpy(data, ptr, size); \
dst->data = data; \
const char* s_ptr = ptr; \
ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \
if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \
dst->data = s_ptr; \
dst->size = size; \
} \
\
ptr += size; \
if (validate_utf8) { \
data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
@ -702,7 +693,7 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
size_t common_has; \
char* buf; \
\
UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \
UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \
UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
@ -791,7 +782,8 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
RETURN_GENERIC("string field tag mismatch\n"); \
} \
\
if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
if (UPB_UNLIKELY( \
!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \
UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
} \
\
@ -805,11 +797,9 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
\
size = (int8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->data = ptr; \
dst->size = size; \
\
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
&d->input, ptr, size))) { \
if (UPB_UNLIKELY( \
!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \
ptr--; \
if (validate_utf8) { \
return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
@ -820,7 +810,10 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
} \
} \
\
ptr += size; \
dst->data = ptr; \
dst->size = size; \
ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \
dst->size); \
\
if (card == CARD_r) { \
if (validate_utf8 && \
@ -832,14 +825,6 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
/* Buffer flipped and we can't alias any more. Bounce to */ \
/* copyfunc(), but via dispatch since we need to reload table */ \
/* data also. */ \
fastdecode_commitarr(dst, &farr, sizeof(upb_StringView)); \
data = ret.tag; \
UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
} \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \

@ -131,8 +131,6 @@ UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
}
d->unknown = new_start;
}
d->options &= ~kUpb_DecodeOption_AliasString;
return new_start;
}

@ -30,6 +30,8 @@
#include <string.h>
#include "upb/mem/arena.h"
// Must be last.
#include "upb/port/def.inc"
@ -41,9 +43,16 @@
// this invariant.
#define kUpb_EpsCopyInputStream_SlopBytes 16
enum {
kUpb_EpsCopyInputStream_NoAliasing = 0,
kUpb_EpsCopyInputStream_OnPatch = 1,
kUpb_EpsCopyInputStream_NoDelta = 2
};
typedef struct {
const char* end; // Can read up to SlopBytes bytes beyond this.
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
uintptr_t aliasing;
int limit; // Submessage limit relative to end
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
} upb_EpsCopyInputStream;
@ -56,14 +65,16 @@ typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
// Initializes a upb_EpsCopyInputStream using the contents of the buffer
// [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least
// kUpb_EpsCopyInputStream_SlopBytes, and returns true if the pointer has been
// updated.
UPB_INLINE bool upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
const char** ptr, size_t size) {
// kUpb_EpsCopyInputStream_SlopBytes are available to read.
UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
const char** ptr, size_t size,
bool enable_aliasing) {
bool ret;
if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
memset(&e->patch, 0, 32);
if (size) memcpy(&e->patch, *ptr, size);
e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
: kUpb_EpsCopyInputStream_NoAliasing;
*ptr = e->patch;
e->end = *ptr + size;
e->limit = 0;
@ -71,10 +82,11 @@ UPB_INLINE bool upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
} else {
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
e->limit = kUpb_EpsCopyInputStream_SlopBytes;
e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
: kUpb_EpsCopyInputStream_NoAliasing;
ret = false;
}
e->limit_ptr = e->end;
return ret;
}
typedef enum {
@ -195,6 +207,62 @@ UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
}
// Returns true if aliasing_enabled=true was passed to
// upb_EpsCopyInputStream_Init() when this stream was initialized.
UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
upb_EpsCopyInputStream* e) {
return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
}
// Returns true if aliasing_enabled=true was passed to
// upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
// alias into the region [ptr, size] in an input buffer.
UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
// When EpsCopyInputStream supports streaming, this will need to become a
// runtime check.
return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
}
UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
uintptr_t delta =
e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
const char* ret = *ptr + size;
*ptr = (const char*)((uintptr_t)*ptr + delta);
UPB_ASSUME(ret != NULL);
return ret;
}
// Reads string data from the stream and advances the pointer accordingly.
// If aliasing was enabled when the stream was initialized, then the returned
// pointer will point into the input buffer if possible, otherwise new data
// will be allocated from arena and copied into. We may be forced to copy even
// if aliasing was enabled if the input data spans input buffers.
//
// Returns NULL if memory allocation failed, or we reached a premature EOF.
UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
upb_EpsCopyInputStream* e, const char** ptr, size_t size,
upb_Arena* arena) {
if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
} else {
// We need to allocate and copy.
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
return NULL;
}
char* data = (char*)upb_Arena_Malloc(arena, size);
if (!data) return NULL;
memcpy(data, *ptr, size);
const char* ret = *ptr + size;
*ptr = data;
UPB_ASSUME(ret != NULL);
return ret;
}
}
UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
}
@ -245,6 +313,9 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
e->limit_ptr = e->end + e->limit;
UPB_ASSERT(ptr < e->limit_ptr);
if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
}
return callback(e, old_end, new_start);
} else {
return callback(e, NULL, NULL);

@ -5,6 +5,7 @@
#include <string>
#include "gtest/gtest.h"
#include "upb/upb.hpp"
// begin:google_only
// #include "testing/fuzzing/fuzztest.h"
// end:google_only
@ -14,7 +15,7 @@ namespace {
TEST(EpsCopyInputStreamTest, ZeroSize) {
upb_EpsCopyInputStream stream;
const char* ptr = NULL;
upb_EpsCopyInputStream_Init(&stream, &ptr, 0);
upb_EpsCopyInputStream_Init(&stream, &ptr, 0, false);
EXPECT_TRUE(upb_EpsCopyInputStream_IsDone(&stream, &ptr, NULL));
}
@ -81,20 +82,49 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
//
// class EpsStream {
// public:
// EpsStream(const std::string& data) : data_(data) {
// EpsStream(const std::string& data, bool enable_aliasing)
// : data_(data), enable_aliasing_(enable_aliasing) {
// ptr_ = data_.data();
// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size());
// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size(), enable_aliasing);
// }
//
// // Returns false at EOF or error.
// int ReadData(int n, std::string* data) {
// EXPECT_LE(n, kUpb_EpsCopyInputStream_SlopBytes);
// // We want to verify that we can read kUpb_EpsCopyInputStream_SlopBytes
// // safely, even if we haven't actually been requested to read that much.
// // We copy to a global buffer so the copy can't be optimized away.
// memcpy(&tmp_buf, ptr_, kUpb_EpsCopyInputStream_SlopBytes);
// data->assign(tmp_buf, n);
// ptr_ += n;
// return PopLimits();
// }
//
// int ReadString(int n, std::string* data) {
// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, n)) return -1;
// const char* str_data = ptr_;
// ptr_ = upb_EpsCopyInputStream_ReadString(&eps_, &str_data, n, arena_.ptr());
// if (!ptr_) return -1;
// if (enable_aliasing_ && n) {
// EXPECT_GE(reinterpret_cast<uintptr_t>(str_data),
// reinterpret_cast<uintptr_t>(data_.data()));
// EXPECT_LT(reinterpret_cast<uintptr_t>(str_data),
// reinterpret_cast<uintptr_t>(data_.data() + data_.size()));
// }
// data->assign(str_data, n);
// return PopLimits();
// }
//
// bool TryPushLimit(int limit) {
// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false;
// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit));
// return true;
// }
//
// bool IsEof() const { return eof_; }
//
// private:
// int PopLimits() {
// int end_limit_count = 0;
//
// while (IsAtLimit()) {
@ -110,15 +140,6 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
// return error_ ? -1 : end_limit_count;
// }
//
// bool TryPushLimit(int limit) {
// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false;
// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit));
// return true;
// }
//
// bool IsEof() const { return eof_; }
//
// private:
// bool IsAtLimit() {
// return upb_EpsCopyInputStream_IsDone(&eps_, &ptr_,
// &EpsStream::IsDoneFallback);
@ -150,12 +171,18 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
// std::string data_;
// const char* ptr_;
// std::vector<int> deltas_;
// upb::Arena arena_;
// bool error_ = false;
// bool eof_ = false;
// bool enable_aliasing_;
// };
//
// // Reads N bytes from the given position.
// struct ReadOp {
// int bytes; // Must be <= kUpb_EpsCopyInputStream_SlopBytes.
// };
//
// struct ReadStringOp {
// int bytes;
// };
//
@ -164,14 +191,16 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
// int bytes;
// };
//
// typedef std::variant<ReadOp, PushLimitOp> Op;
// typedef std::variant<ReadOp, ReadStringOp, PushLimitOp> Op;
//
// struct EpsCopyTestScript {
// int data_size;
// bool enable_aliasing;
// std::vector<Op> ops;
// };
//
// auto ArbitraryEpsCopyTestScript() {
// using ::fuzztest::Arbitrary;
// using ::fuzztest::InRange;
// using ::fuzztest::NonNegative;
// using ::fuzztest::StructOf;
@ -182,9 +211,12 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
//
// return StructOf<EpsCopyTestScript>(
// InRange(0, max_data_size), // data_size
// Arbitrary<bool>(), // enable_aliasing
// VectorOf(VariantOf(
// // ReadOp
// StructOf<ReadOp>(InRange(0, kUpb_EpsCopyInputStream_SlopBytes)),
// // ReadStringOp
// StructOf<ReadStringOp>(NonNegative<int>()),
// // PushLimitOp
// StructOf<PushLimitOp>(NonNegative<int>()))));
// }
@ -198,7 +230,7 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
// }
//
// FakeStream fake_stream(data);
// EpsStream eps_stream(data);
// EpsStream eps_stream(data, script.enable_aliasing);
//
// for (const auto& op : script.ops) {
// if (const ReadOp* read_op = std::get_if<ReadOp>(&op)) {
@ -211,24 +243,82 @@ TEST(EpsCopyInputStreamTest, ZeroSize) {
// EXPECT_EQ(data_fake, data_eps);
// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof());
// if (fake_stream.IsEof()) break;
// } else if (const ReadStringOp* read_op = std::get_if<ReadStringOp>(&op)) {
// std::string data_fake;
// std::string data_eps;
// int fake_result = fake_stream.ReadData(read_op->bytes, &data_fake);
// int eps_result = eps_stream.ReadString(read_op->bytes, &data_eps);
// EXPECT_EQ(fake_result, eps_result);
// if (fake_result == -1) break; // Error
// EXPECT_EQ(data_fake, data_eps);
// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof());
// if (fake_stream.IsEof()) break;
// } else if (const PushLimitOp* push = std::get_if<PushLimitOp>(&op)) {
// EXPECT_EQ(fake_stream.TryPushLimit(push->bytes),
// eps_stream.TryPushLimit(push->bytes));
// } else {
// EXPECT_TRUE(false); // Unknown op.
// }
// }
// }
//
// // Test with:
// // $ blaze run --config=fuzztest third_party/upb:eps_copy_input_stream_test \
// // -- --gunit_fuzz=
// FUZZ_TEST(EpsCopyFuzzTest, TestAgainstFakeStream)
// .WithDomains(ArbitraryEpsCopyTestScript());
//
// TEST(EpsCopyFuzzTest, TestAgainstFakeStreamRegression) {
// TestAgainstFakeStream({299,
// false,
// {
// PushLimitOp{2},
// ReadOp{14},
// }});
// }
//
// TEST(EpsCopyFuzzTest, AliasingEnabledZeroSizeReadString) {
// TestAgainstFakeStream({510, true, {ReadStringOp{0}}});
// }
//
// TEST(EpsCopyFuzzTest, AliasingDisabledZeroSizeReadString) {
// TestAgainstFakeStream({510, false, {ReadStringOp{0}}});
// }
//
// TEST(EpsCopyFuzzTest, ReadStringZero) {
// TestAgainstFakeStream({0, true, {ReadStringOp{0}}});
// }
//
// TEST(EpsCopyFuzzTest, ReadZero) {
// TestAgainstFakeStream({0, true, {ReadOp{0}}});
// }
//
// TEST(EpsCopyFuzzTest, ReadZeroTwice) {
// TestAgainstFakeStream({0, true, {ReadOp{0}, ReadOp{0}}});
// }
//
// TEST(EpsCopyFuzzTest, ReadStringZeroThenRead) {
// TestAgainstFakeStream({0, true, {ReadStringOp{0}, ReadOp{0}}});
// }
//
// TEST(EpsCopyFuzzTest, ReadStringOverflowsBufferButNotLimit) {
// TestAgainstFakeStream({351,
// false,
// {
// ReadOp{7},
// PushLimitOp{2147483647},
// ReadStringOp{344},
// }});
// }
//
// TEST(EpsCopyFuzzTest, LastBufferAliasing) {
// TestAgainstFakeStream({27, true, {ReadOp{12}, ReadStringOp{3}}});
// }
//
// TEST(EpsCopyFuzzTest, FirstBufferAliasing) {
// TestAgainstFakeStream({7, true, {ReadStringOp{3}}});
// }
//
// end:google_only
} // namespace

Loading…
Cancel
Save