diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index d320460b9f..bbe14088ee 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -6331,7 +6331,7 @@ bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, upb_Message* msg, const uint32_t tag = ((uint32_t)field->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; upb_Message* unknown_msg = - field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->unknown_msg + field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->original_msg : msg; char buf[2 * kUpb_Decoder_EncodeVarint32MaxSize]; char* end = buf; @@ -7118,7 +7118,7 @@ const char* _upb_Decoder_DecodeKnownField(upb_Decoder* d, const char* ptr, if (UPB_UNLIKELY(!ext)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - d->unknown_msg = msg; + d->original_msg = msg; msg = (upb_Message*)&ext->data; if (upb_MiniTableField_IsSubMessage(&ext->ext->UPB_PRIVATE(field))) { ext_sub.UPB_PRIVATE(submsg) = @@ -7163,7 +7163,8 @@ static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, // Since unknown fields are the uncommon case, we do a little extra work here // to walk backwards through the buffer to find the field start. This frees // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. + // significant speedups in benchmarks. Note that ptr may point into the slop + // space, beyond the normal end of the input buffer. const char* start = ptr; if (wire_type == kUpb_WireType_Delimited) ptr += val.size; @@ -7189,15 +7190,20 @@ static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, start = _upb_Decoder_ReverseSkipVarint(start, tag); assert(start == d->debug_tagstart); + const char* input_start = + upb_EpsCopyInputStream_GetInputPtr(&d->input, start); if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; } - if (!UPB_PRIVATE(_upb_Message_AddUnknown)(msg, start, ptr - start, - &d->arena)) { + // Normally, bounds checks for fixed or varint fields are performed after + // the field is parsed; it's OK for the field to overrun the end of the + // buffer, because it'll just read into slop space. However, because this + // path reads bytes from the input buffer rather than the patch buffer, + // bounds checks are needed before adding the unknown field. + _upb_Decoder_IsDone(d, &ptr); + const char* input_ptr = upb_EpsCopyInputStream_GetInputPtr(&d->input, ptr); + if (!UPB_PRIVATE(_upb_Message_AddUnknown)( + msg, input_start, input_ptr - input_start, &d->arena)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } else if (wire_type == kUpb_WireType_StartGroup) { @@ -7330,7 +7336,6 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, upb_Message* msg, options & kUpb_DecodeOption_AliasString); decoder.extreg = extreg; - decoder.unknown = NULL; decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; decoder.end_group = DECODE_NOGROUP; decoder.options = (uint16_t)options; diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index 94f9042e95..5882d6e698 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -763,18 +763,13 @@ extern "C" { // this invariant. #define kUpb_EpsCopyInputStream_SlopBytes 16 -enum { - kUpb_EpsCopyInputStream_NoAliasing = 0, - kUpb_EpsCopyInputStream_OnPatch = 1, - kUpb_EpsCopyInputStream_NoDelta = 2 -}; - typedef struct { const char* end; // Can read up to SlopBytes bytes beyond this. const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) - uintptr_t aliasing; + uintptr_t input_delta; // Diff between the original input pointer and patch int limit; // Submessage limit relative to end bool error; // To distinguish between EOF and error. + bool aliasing; char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; @@ -800,17 +795,16 @@ UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, if (size <= kUpb_EpsCopyInputStream_SlopBytes) { memset(&e->patch, 0, 32); if (size) memcpy(&e->patch, *ptr, size); - e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch - : kUpb_EpsCopyInputStream_NoAliasing; + e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch; *ptr = e->patch; e->end = *ptr + size; e->limit = 0; } else { e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; e->limit = kUpb_EpsCopyInputStream_SlopBytes; - e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta - : kUpb_EpsCopyInputStream_NoAliasing; + e->input_delta = 0; } + e->aliasing = enable_aliasing; e->limit_ptr = e->end; e->error = false; } @@ -953,7 +947,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( // upb_EpsCopyInputStream_Init() when this stream was initialized. UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( upb_EpsCopyInputStream* e) { - return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing; + return e->aliasing; } // Returns true if aliasing_enabled=true was passed to @@ -963,8 +957,16 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( upb_EpsCopyInputStream* e, const char* ptr, size_t size) { // When EpsCopyInputStream supports streaming, this will need to become a // runtime check. - return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) && - e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; + return e->aliasing && + upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size); +} + +// Returns a pointer into an input buffer that corresponds to the parsing +// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may +// be different if we are currently parsing out of the patch buffer. +UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr( + upb_EpsCopyInputStream* e, const char* ptr) { + return (const char*)(((uintptr_t)ptr) + e->input_delta); } // Returns a pointer into an input buffer that corresponds to the parsing @@ -976,9 +978,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( upb_EpsCopyInputStream* e, const char* ptr) { UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); - uintptr_t delta = - e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; - return (const char*)((uintptr_t)ptr + delta); + return upb_EpsCopyInputStream_GetInputPtr(e, ptr); } // Reads string data from the input, aliasing into the input buffer instead of @@ -1092,9 +1092,7 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( e->limit -= kUpb_EpsCopyInputStream_SlopBytes; e->limit_ptr = e->end + e->limit; UPB_ASSERT(ptr < e->limit_ptr); - if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) { - e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start; - } + e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start; return callback(e, old_end, new_start); } else { UPB_ASSERT(overrun > e->limit); @@ -14267,17 +14265,18 @@ enum { #ifndef UPB_WIRE_INTERNAL_DECODER_H_ #define UPB_WIRE_INTERNAL_DECODER_H_ +#include + #include "utf8_range.h" // Must be last. -#define DECODE_NOGROUP (uint32_t) - 1 +#define DECODE_NOGROUP (uint32_t)-1 typedef struct upb_Decoder { upb_EpsCopyInputStream input; const upb_ExtensionRegistry* extreg; - const char* unknown; // Start of unknown data, preserve at buffer flip - upb_Message* unknown_msg; // Pointer to preserve data to + upb_Message* original_msg; // Pointer to preserve data to int depth; // Tracks recursion depth to bound stack usage. uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP. uint16_t options; @@ -14337,14 +14336,6 @@ UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) { upb_Decoder* d = (upb_Decoder*)e; if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - - if (d->unknown) { - if (!UPB_PRIVATE(_upb_Message_AddUnknown)( - d->unknown_msg, d->unknown, old_end - d->unknown, &d->arena)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - d->unknown = new_start; - } return new_start; } diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index debe573b46..188598f8f1 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -6331,7 +6331,7 @@ bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, upb_Message* msg, const uint32_t tag = ((uint32_t)field->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint; upb_Message* unknown_msg = - field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->unknown_msg + field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->original_msg : msg; char buf[2 * kUpb_Decoder_EncodeVarint32MaxSize]; char* end = buf; @@ -7118,7 +7118,7 @@ const char* _upb_Decoder_DecodeKnownField(upb_Decoder* d, const char* ptr, if (UPB_UNLIKELY(!ext)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } - d->unknown_msg = msg; + d->original_msg = msg; msg = (upb_Message*)&ext->data; if (upb_MiniTableField_IsSubMessage(&ext->ext->UPB_PRIVATE(field))) { ext_sub.UPB_PRIVATE(submsg) = @@ -7163,7 +7163,8 @@ static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, // Since unknown fields are the uncommon case, we do a little extra work here // to walk backwards through the buffer to find the field start. This frees // up a register in the fast paths (when the field is known), which leads to - // significant speedups in benchmarks. + // significant speedups in benchmarks. Note that ptr may point into the slop + // space, beyond the normal end of the input buffer. const char* start = ptr; if (wire_type == kUpb_WireType_Delimited) ptr += val.size; @@ -7189,15 +7190,20 @@ static const char* _upb_Decoder_DecodeUnknownField(upb_Decoder* d, start = _upb_Decoder_ReverseSkipVarint(start, tag); assert(start == d->debug_tagstart); + const char* input_start = + upb_EpsCopyInputStream_GetInputPtr(&d->input, start); if (wire_type == kUpb_WireType_StartGroup) { - d->unknown = start; - d->unknown_msg = msg; ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number); - start = d->unknown; - d->unknown = NULL; } - if (!UPB_PRIVATE(_upb_Message_AddUnknown)(msg, start, ptr - start, - &d->arena)) { + // Normally, bounds checks for fixed or varint fields are performed after + // the field is parsed; it's OK for the field to overrun the end of the + // buffer, because it'll just read into slop space. However, because this + // path reads bytes from the input buffer rather than the patch buffer, + // bounds checks are needed before adding the unknown field. + _upb_Decoder_IsDone(d, &ptr); + const char* input_ptr = upb_EpsCopyInputStream_GetInputPtr(&d->input, ptr); + if (!UPB_PRIVATE(_upb_Message_AddUnknown)( + msg, input_start, input_ptr - input_start, &d->arena)) { _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); } } else if (wire_type == kUpb_WireType_StartGroup) { @@ -7330,7 +7336,6 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, upb_Message* msg, options & kUpb_DecodeOption_AliasString); decoder.extreg = extreg; - decoder.unknown = NULL; decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit; decoder.end_group = DECODE_NOGROUP; decoder.options = (uint16_t)options; diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index c63adc744b..c8605816b6 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -765,18 +765,13 @@ extern "C" { // this invariant. #define kUpb_EpsCopyInputStream_SlopBytes 16 -enum { - kUpb_EpsCopyInputStream_NoAliasing = 0, - kUpb_EpsCopyInputStream_OnPatch = 1, - kUpb_EpsCopyInputStream_NoDelta = 2 -}; - typedef struct { const char* end; // Can read up to SlopBytes bytes beyond this. const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) - uintptr_t aliasing; + uintptr_t input_delta; // Diff between the original input pointer and patch int limit; // Submessage limit relative to end bool error; // To distinguish between EOF and error. + bool aliasing; char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; @@ -802,17 +797,16 @@ UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, if (size <= kUpb_EpsCopyInputStream_SlopBytes) { memset(&e->patch, 0, 32); if (size) memcpy(&e->patch, *ptr, size); - e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch - : kUpb_EpsCopyInputStream_NoAliasing; + e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch; *ptr = e->patch; e->end = *ptr + size; e->limit = 0; } else { e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; e->limit = kUpb_EpsCopyInputStream_SlopBytes; - e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta - : kUpb_EpsCopyInputStream_NoAliasing; + e->input_delta = 0; } + e->aliasing = enable_aliasing; e->limit_ptr = e->end; e->error = false; } @@ -955,7 +949,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( // upb_EpsCopyInputStream_Init() when this stream was initialized. UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( upb_EpsCopyInputStream* e) { - return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing; + return e->aliasing; } // Returns true if aliasing_enabled=true was passed to @@ -965,8 +959,16 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( upb_EpsCopyInputStream* e, const char* ptr, size_t size) { // When EpsCopyInputStream supports streaming, this will need to become a // runtime check. - return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) && - e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; + return e->aliasing && + upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size); +} + +// Returns a pointer into an input buffer that corresponds to the parsing +// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may +// be different if we are currently parsing out of the patch buffer. +UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr( + upb_EpsCopyInputStream* e, const char* ptr) { + return (const char*)(((uintptr_t)ptr) + e->input_delta); } // Returns a pointer into an input buffer that corresponds to the parsing @@ -978,9 +980,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( upb_EpsCopyInputStream* e, const char* ptr) { UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); - uintptr_t delta = - e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; - return (const char*)((uintptr_t)ptr + delta); + return upb_EpsCopyInputStream_GetInputPtr(e, ptr); } // Reads string data from the input, aliasing into the input buffer instead of @@ -1094,9 +1094,7 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( e->limit -= kUpb_EpsCopyInputStream_SlopBytes; e->limit_ptr = e->end + e->limit; UPB_ASSERT(ptr < e->limit_ptr); - if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) { - e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start; - } + e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start; return callback(e, old_end, new_start); } else { UPB_ASSERT(overrun > e->limit); @@ -14269,17 +14267,18 @@ enum { #ifndef UPB_WIRE_INTERNAL_DECODER_H_ #define UPB_WIRE_INTERNAL_DECODER_H_ +#include + #include "utf8_range.h" // Must be last. -#define DECODE_NOGROUP (uint32_t) - 1 +#define DECODE_NOGROUP (uint32_t)-1 typedef struct upb_Decoder { upb_EpsCopyInputStream input; const upb_ExtensionRegistry* extreg; - const char* unknown; // Start of unknown data, preserve at buffer flip - upb_Message* unknown_msg; // Pointer to preserve data to + upb_Message* original_msg; // Pointer to preserve data to int depth; // Tracks recursion depth to bound stack usage. uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP. uint16_t options; @@ -14339,14 +14338,6 @@ UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) { upb_Decoder* d = (upb_Decoder*)e; if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); - - if (d->unknown) { - if (!UPB_PRIVATE(_upb_Message_AddUnknown)( - d->unknown_msg, d->unknown, old_end - d->unknown, &d->arena)) { - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - } - d->unknown = new_start; - } return new_start; }