From d1b851c9bc41f1e0c4567a322d36bb25da773db2 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Fri, 15 Nov 2024 14:52:27 -0800 Subject: [PATCH] Add unknown fields during group decode in a single call, to permit aliasing PiperOrigin-RevId: 697008785 --- upb/message/BUILD | 2 ++ upb/message/test.cc | 2 ++ upb/wire/decode.c | 25 ++++++++++++--------- upb/wire/eps_copy_input_stream.h | 38 +++++++++++++++----------------- upb/wire/internal/decoder.h | 15 ++++--------- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/upb/message/BUILD b/upb/message/BUILD index 7bb8df663d..43d391d512 100644 --- a/upb/message/BUILD +++ b/upb/message/BUILD @@ -369,11 +369,13 @@ cc_test( ":message_test_upb_proto", ":message_test_upb_proto_reflection", "//upb:base", + "//upb:eps_copy_input_stream", "//upb:json", "//upb:mem", "//upb:mini_table", "//upb:reflection", "//upb:wire", + "//upb:wire_reader", "//upb/test:fuzz_util", "//upb/test:test_messages_proto3_upb_proto", "@com_google_googletest//:gtest", diff --git a/upb/message/test.cc b/upb/message/test.cc index a96a7789fd..7ee644248a 100644 --- a/upb/message/test.cc +++ b/upb/message/test.cc @@ -37,6 +37,8 @@ #include "upb/test/fuzz_util.h" #include "upb/wire/decode.h" #include "upb/wire/encode.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/types.h" void VerifyMessage(const upb_test_TestExtensions* ext_msg) { EXPECT_TRUE(upb_test_TestExtensions_has_optional_int32_ext(ext_msg)); diff --git a/upb/wire/decode.c b/upb/wire/decode.c index 09a6b13d95..4818b7ddc0 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -394,7 +394,7 @@ bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, upb_Message* msg, const uint32_t tag = ((uint32_t)field->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; upb_Message* unknown_msg = - field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->unknown_msg + field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->original_msg : msg; char buf[2 * kUpb_Decoder_EncodeVarint32MaxSize]; char* end = buf; @@ -1181,7 +1181,7 @@ const char* _upb_Decoder_DecodeKnownField(upb_Decoder* d, const char* ptr, if (UPB_UNLIKELY(!ext)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - d->unknown_msg = msg; + d->original_msg = msg; msg = (upb_Message*)&ext->data; if (upb_MiniTableField_IsSubMessage(&ext->ext->UPB_PRIVATE(field))) { ext_sub.UPB_PRIVATE(submsg) = @@ -1226,7 +1226,8 @@ static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, // Since unknown fields are the uncommon case, we do a little extra work here // to walk backwards through the buffer to find the field start. This frees // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. + // significant speedups in benchmarks. Note that ptr may point into the slop + // space, beyond the normal end of the input buffer. const char* start = ptr; if (wire_type == kUpb_WireType_Delimited) ptr += val.size; @@ -1252,15 +1253,20 @@ static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, start = _upb_Decoder_ReverseSkipVarint(start, tag); assert(start == d->debug_tagstart); + const char* input_start = + upb_EpsCopyInputStream_GetInputPtr(&d->input, start); if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; } - if (!UPB_PRIVATE(_upb_Message_AddUnknown)(msg, start, ptr - start, - &d->arena)) { + // Normally, bounds checks for fixed or varint fields are performed after + // the field is parsed; it's OK for the field to overrun the end of the + // buffer, because it'll just read into slop space. However, because this + // path reads bytes from the input buffer rather than the patch buffer, + // bounds checks are needed before adding the unknown field. + _upb_Decoder_IsDone(d, &ptr); + const char* input_ptr = upb_EpsCopyInputStream_GetInputPtr(&d->input, ptr); + if (!UPB_PRIVATE(_upb_Message_AddUnknown)( + msg, input_start, input_ptr - input_start, &d->arena)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } else if (wire_type == kUpb_WireType_StartGroup) { @@ -1393,7 +1399,6 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, upb_Message* msg, options & kUpb_DecodeOption_AliasString); decoder.extreg = extreg; - decoder.unknown = NULL; decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; decoder.end_group = DECODE_NOGROUP; decoder.options = (uint16_t)options; diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index 690ab49666..2307f364a7 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -27,18 +27,13 @@ extern "C" { // this invariant. #define kUpb_EpsCopyInputStream_SlopBytes 16 -enum { - kUpb_EpsCopyInputStream_NoAliasing = 0, - kUpb_EpsCopyInputStream_OnPatch = 1, - kUpb_EpsCopyInputStream_NoDelta = 2 -}; - typedef struct { const char* end; // Can read up to SlopBytes bytes beyond this. const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) - uintptr_t aliasing; + uintptr_t input_delta; // Diff between the original input pointer and patch int limit; // Submessage limit relative to end bool error; // To distinguish between EOF and error. + bool aliasing; char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; @@ -64,17 +59,16 @@ UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, if (size <= kUpb_EpsCopyInputStream_SlopBytes) { memset(&e->patch, 0, 32); if (size) memcpy(&e->patch, *ptr, size); - e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch - : kUpb_EpsCopyInputStream_NoAliasing; + e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch; *ptr = e->patch; e->end = *ptr + size; e->limit = 0; } else { e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; e->limit = kUpb_EpsCopyInputStream_SlopBytes; - e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta - : kUpb_EpsCopyInputStream_NoAliasing; + e->input_delta = 0; } + e->aliasing = enable_aliasing; e->limit_ptr = e->end; e->error = false; } @@ -217,7 +211,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( // upb_EpsCopyInputStream_Init() when this stream was initialized. UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( upb_EpsCopyInputStream* e) { - return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing; + return e->aliasing; } // Returns true if aliasing_enabled=true was passed to @@ -227,8 +221,16 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( upb_EpsCopyInputStream* e, const char* ptr, size_t size) { // When EpsCopyInputStream supports streaming, this will need to become a // runtime check. - return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) && - e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; + return e->aliasing && + upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size); +} + +// Returns a pointer into an input buffer that corresponds to the parsing +// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may +// be different if we are currently parsing out of the patch buffer. +UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr( + upb_EpsCopyInputStream* e, const char* ptr) { + return (const char*)(((uintptr_t)ptr) + e->input_delta); } // Returns a pointer into an input buffer that corresponds to the parsing @@ -240,9 +242,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( upb_EpsCopyInputStream* e, const char* ptr) { UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); - uintptr_t delta = - e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; - return (const char*)((uintptr_t)ptr + delta); + return upb_EpsCopyInputStream_GetInputPtr(e, ptr); } // Reads string data from the input, aliasing into the input buffer instead of @@ -356,9 +356,7 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( e->limit -= kUpb_EpsCopyInputStream_SlopBytes; e->limit_ptr = e->end + e->limit; UPB_ASSERT(ptr < e->limit_ptr); - if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) { - e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start; - } + e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start; return callback(e, old_end, new_start); } else { UPB_ASSERT(overrun > e->limit); diff --git a/upb/wire/internal/decoder.h b/upb/wire/internal/decoder.h index 5ca1e1b2b2..c67fbcf5ec 100644 --- a/upb/wire/internal/decoder.h +++ b/upb/wire/internal/decoder.h @@ -13,6 +13,8 @@ #ifndef UPB_WIRE_INTERNAL_DECODER_H_ #define UPB_WIRE_INTERNAL_DECODER_H_ +#include + #include "upb/mem/internal/arena.h" #include "upb/message/internal/message.h" #include "upb/wire/decode.h" @@ -22,13 +24,12 @@ // Must be last. #include "upb/port/def.inc" -#define DECODE_NOGROUP (uint32_t) - 1 +#define DECODE_NOGROUP (uint32_t)-1 typedef struct upb_Decoder { upb_EpsCopyInputStream input; const upb_ExtensionRegistry* extreg; - const char* unknown; // Start of unknown data, preserve at buffer flip - upb_Message* unknown_msg; // Pointer to preserve data to + upb_Message* original_msg; // Pointer to preserve data to int depth; // Tracks recursion depth to bound stack usage. uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP. uint16_t options; @@ -88,14 +89,6 @@ UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) { upb_Decoder* d = (upb_Decoder*)e; if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - - if (d->unknown) { - if (!UPB_PRIVATE(_upb_Message_AddUnknown)( - d->unknown_msg, d->unknown, old_end - d->unknown, &d->arena)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - d->unknown = new_start; - } return new_start; }