diff --git a/BUILD b/BUILD index 462f15e465..a3558edf37 100644 --- a/BUILD +++ b/BUILD @@ -266,12 +266,14 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":collections_internal", + ":eps_copy_input_stream", ":hash", ":message_internal", ":mini_table_internal", ":port", ":upb", ":wire", + ":wire_reader", ], ) @@ -934,30 +936,59 @@ cc_library( "upb/wire/decode_fast.h", "upb/wire/decode_internal.h", "upb/wire/encode.h", - "upb/wire/eps_copy_input_stream.h", "upb/wire/swap_internal.h", - "upb/wire/types.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//:__subpackages__"], deps = [ ":base", ":collections_internal", + ":eps_copy_input_stream", ":hash", ":mem_internal", ":message_internal", ":mini_table_internal", ":port", + ":wire_types", "@utf8_range", ], ) +cc_library( + name = "wire_types", + hdrs = ["upb/wire/types.h"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "eps_copy_input_stream", + srcs = ["upb/wire/eps_copy_input_stream.c"], + hdrs = ["upb/wire/eps_copy_input_stream.h"], + visibility = ["//visibility:public"], + deps = [ + ":mem", + ":port", + ], +) + +cc_library( + name = "wire_reader", + srcs = ["upb/wire/reader.c"], + hdrs = ["upb/wire/reader.h"], + visibility = ["//visibility:public"], + deps = [ + ":eps_copy_input_stream", + ":port", + ":wire_types", + ], +) + cc_test( name = "eps_copy_input_stream_test", srcs = ["upb/wire/eps_copy_input_stream_test.cc"], deps = [ + ":eps_copy_input_stream", ":upb", - ":wire_internal", "@com_google_googletest//:gtest_main", ], ) @@ -1014,6 +1045,7 @@ upb_amalgamation( ":base", ":collections_internal", ":descriptor_upb_proto", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":lex", @@ -1026,6 +1058,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], strip_import_prefix = ["src"], ) @@ -1049,6 +1083,7 @@ upb_amalgamation( ":collections_internal", ":descriptor_upb_proto", ":descriptor_upb_proto_reflection", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":json", @@ -1062,6 +1097,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], prefix = "php-", strip_import_prefix = ["src"], @@ -1086,6 +1123,7 @@ upb_amalgamation( ":base", ":collections_internal", ":descriptor_upb_proto", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":json", @@ -1099,6 +1137,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], prefix = "ruby-", strip_import_prefix = ["src"], diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 4359441a33..8178e359b8 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -33,64 +33,12 @@ #include "upb/message/message.h" #include "upb/wire/decode.h" #include "upb/wire/encode.h" -#include "upb/wire/types.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" // Must be last. #include "upb/port/def.inc" -typedef struct { - const char* ptr; - uint64_t val; -} decode_vret; - -UPB_NOINLINE -static decode_vret decode_longvarint64(const char* ptr, uint64_t val) { - decode_vret ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } - } - return ret; -} - -UPB_FORCEINLINE -static const char* decode_varint64(const char* ptr, uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr) return NULL; - *val = res.val; - return res.ptr; - } -} - -UPB_FORCEINLINE -static const char* decode_tag(const char* ptr, uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = (uint32_t)byte; - return ptr + 1; - } else { - const char* start = ptr; - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - return NULL; // Malformed. - } - *val = (uint32_t)res.val; - return res.ptr; - } -} - // Parses unknown data by merging into existing base_message or creating a // new message usingg mini_table. static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage( @@ -108,8 +56,8 @@ static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage( const char* data = unknown_data; uint32_t tag; uint64_t message_len = 0; - data = decode_tag(data, &tag); - data = decode_varint64(data, &message_len); + data = upb_WireReader_ReadTag(data, &tag); + data = upb_WireReader_ReadVarint(data, &message_len); upb_DecodeStatus status = upb_Decode(data, message_len, ret.message, mini_table, NULL, decode_options, arena); if (status == kUpb_DecodeStatus_OutOfMemory) { @@ -192,131 +140,39 @@ upb_GetExtensionAsBytes_Status upb_MiniTable_GetExtensionAsBytes( const char* data = result.ptr; uint32_t tag; uint64_t message_len = 0; - data = decode_tag(data, &tag); - data = decode_varint64(data, &message_len); + data = upb_WireReader_ReadTag(data, &tag); + data = upb_WireReader_ReadVarint(data, &message_len); *extension_data = data; *len = message_len; return kUpb_GetExtensionAsBytes_Ok; } -static const char* UnknownFieldSet_SkipGroup(const char* ptr, const char* end, - int group_number); - -static const char* UnknownFieldSet_SkipField(const char* ptr, const char* end, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return decode_varint64(ptr, &val); - } - case kUpb_WireType_64Bit: - if (end - ptr < 8) return NULL; - return ptr + 8; - case kUpb_WireType_32Bit: - if (end - ptr < 4) return NULL; - return ptr + 4; - case kUpb_WireType_Delimited: { - uint64_t size; - ptr = decode_varint64(ptr, &size); - if (!ptr || end - ptr < size) return NULL; - return ptr + size; - } - case kUpb_WireType_StartGroup: - return UnknownFieldSet_SkipGroup(ptr, end, field_number); - case kUpb_WireType_EndGroup: - return NULL; - default: - assert(0); - return NULL; - } -} - -static const char* UnknownFieldSet_SkipGroup(const char* ptr, const char* end, - int group_number) { - uint32_t end_tag = (group_number << 3) | kUpb_WireType_EndGroup; - while (true) { - if (ptr == end) return NULL; - uint64_t tag; - ptr = decode_varint64(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_tag) return ptr; - ptr = UnknownFieldSet_SkipField(ptr, end, (uint32_t)tag); - if (!ptr) return NULL; - } - return ptr; +upb_FindUnknownRet upb_FindUnknownRet_ParseError() { + return (upb_FindUnknownRet){.status = kUpb_FindUnknown_ParseError}; } -enum { - kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup, - kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup, - kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint, - kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited, -}; - upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, uint32_t field_number) { + const int depth_limit = 100; // TODO: this should be a parameter size_t size; upb_FindUnknownRet ret; const char* ptr = upb_Message_GetUnknown(msg, &size); - if (size == 0) { - ret.status = kUpb_FindUnknown_NotPresent; - ret.ptr = NULL; - ret.len = 0; - return ret; - } - const char* end = ptr + size; - uint64_t uint64_val; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); - while (ptr < end) { - uint32_t tag = 0; - int field; - int wire_type; + while (!upb_EpsCopyInputStream_IsDone(&stream, &ptr)) { + uint32_t tag; const char* unknown_begin = ptr; - ptr = decode_tag(ptr, &tag); - field = tag >> 3; - wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_EndGroup: - ret.status = kUpb_FindUnknown_ParseError; - return ret; - case kUpb_WireType_Varint: - ptr = decode_varint64(ptr, &uint64_val); - if (!ptr) { - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - break; - case kUpb_WireType_32Bit: - ptr += 4; - break; - case kUpb_WireType_64Bit: - ptr += 8; - break; - case kUpb_WireType_Delimited: - // Read size. - ptr = decode_varint64(ptr, &uint64_val); - if (uint64_val >= INT32_MAX || !ptr) { - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - ptr += uint64_val; - break; - case kUpb_WireType_StartGroup: - // tag >> 3 specifies the group number, recurse and skip - // until we see group end tag. - ptr = UnknownFieldSet_SkipGroup(ptr, end, field_number); - break; - default: - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - if (field_number == field) { + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return upb_FindUnknownRet_ParseError(); + ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + if (!ptr) return upb_FindUnknownRet_ParseError(); + if (field_number == upb_WireReader_GetFieldNumber(tag)) { ret.status = kUpb_FindUnknown_Ok; ret.ptr = unknown_begin; ret.len = ptr - unknown_begin; + upb_EpsCopyInputStream_ReadStringAliased(&stream, &ret.ptr, ret.len); return ret; } } diff --git a/upb/util/BUILD b/upb/util/BUILD index 2776132f78..4221570a5c 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -124,6 +124,7 @@ cc_test( deps = [ ":compare", "//:wire_internal", + "//:wire_types", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], diff --git a/upb/wire/decode_internal.h b/upb/wire/decode_internal.h index 03d2a05b2f..14cc518c2e 100644 --- a/upb/wire/decode_internal.h +++ b/upb/wire/decode_internal.h @@ -115,8 +115,8 @@ const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, const char* ptr, int overrun); UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) { - return upb_EpsCopyInputStream_IsDone(&d->input, ptr, - &_upb_Decoder_IsDoneFallback); + return upb_EpsCopyInputStream_IsDoneWithCallback( + &d->input, ptr, &_upb_Decoder_IsDoneFallback); } UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( diff --git a/upb/wire/eps_copy_input_stream.c b/upb/wire/eps_copy_input_stream.c new file mode 100644 index 0000000000..ebbe40adf8 --- /dev/null +++ b/upb/wire/eps_copy_input_stream.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/wire/eps_copy_input_stream.h" + +static const char* _upb_EpsCopyInputStream_NoOpCallback( + upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) { + return new_start; +} + +const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( + upb_EpsCopyInputStream* e, const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_EpsCopyInputStream_NoOpCallback); +} diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index 234a918d4d..98d79d2359 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -54,6 +54,7 @@ typedef struct { const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) uintptr_t aliasing; int limit; // Submessage limit relative to end + bool error; // To distinguish between EOF and error. char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; @@ -87,6 +88,7 @@ UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, ret = false; } e->limit_ptr = e->end; + e->error = false; } typedef enum { @@ -122,7 +124,7 @@ UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus( // // Postcondition: if the function returns false, there are at least // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr. -UPB_INLINE bool upb_EpsCopyInputStream_IsDone( +UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback( upb_EpsCopyInputStream* e, const char** ptr, upb_EpsCopyInputStream_IsDoneFallbackFunc* func) { int overrun; @@ -137,6 +139,21 @@ UPB_INLINE bool upb_EpsCopyInputStream_IsDone( } } +const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( + upb_EpsCopyInputStream* e, const char* ptr, int overrun); + +// A simpler version of IsDoneWithCallback() that does not support a buffer flip +// callback. Useful in cases where we do not need to insert custom logic at +// every buffer flip. +// +// If this returns true, the user must call upb_EpsCopyInputStream_IsError() +// to distinguish between EOF and error. +UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e, + const char** ptr) { + return upb_EpsCopyInputStream_IsDoneWithCallback( + e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback); +} + // Returns the total number of bytes that are safe to read from the current // buffer without reading uninitialized or unallocated memory. // @@ -318,6 +335,7 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( } return callback(e, old_end, new_start); } else { + e->error = true; return callback(e, NULL, NULL); } } diff --git a/upb/wire/eps_copy_input_stream_test.cc b/upb/wire/eps_copy_input_stream_test.cc index 3ccb29db7f..398a09fed7 100644 --- a/upb/wire/eps_copy_input_stream_test.cc +++ b/upb/wire/eps_copy_input_stream_test.cc @@ -16,7 +16,7 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { upb_EpsCopyInputStream stream; const char* ptr = NULL; upb_EpsCopyInputStream_Init(&stream, &ptr, 0, false); - EXPECT_TRUE(upb_EpsCopyInputStream_IsDone(&stream, &ptr, NULL)); + EXPECT_TRUE(upb_EpsCopyInputStream_IsDoneWithCallback(&stream, &ptr, NULL)); } // begin:google_only @@ -141,8 +141,8 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // } // // bool IsAtLimit() { -// return upb_EpsCopyInputStream_IsDone(&eps_, &ptr_, -// &EpsStream::IsDoneFallback); +// return upb_EpsCopyInputStream_IsDoneWithCallback( +// &eps_, &ptr_, &EpsStream::IsDoneFallback); // } // // // Return false on EOF. diff --git a/upb/wire/reader.c b/upb/wire/reader.c new file mode 100644 index 0000000000..1ba0b479b3 --- /dev/null +++ b/upb/wire/reader.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/wire/reader.h" + +// Must be last. +#include "upb/port/def.inc" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/types.h" + +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; +} + +const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; + } + return ptr; +} diff --git a/upb/wire/reader.h b/upb/wire/reader.h new file mode 100644 index 0000000000..8cd42f6c3e --- /dev/null +++ b/upb/wire/reader.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_WIRE_READER_H_ +#define UPB_WIRE_READER_H_ + +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/types.h" + +// Must be last. +#include "upb/port/def.inc" + +// The upb_WireReader interface is suitable for general-purpose parsing of +// protobuf binary wire format. It is designed to be used along with +// upb_EpsCopyInputStream for buffering, and all parsing routines in this file +// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is +// available to read without any bounds checks. + +typedef struct { + const char* ptr; + uint64_t val; +} _upb_WireReader_ReadLongVarintRet; + +_upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint( + const char* ptr, uint64_t val); + +static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, + uint64_t* val, + int maxlen, + uint64_t maxval) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = (uint32_t)byte; + return ptr + 1; + } + const char* start = ptr; + _upb_WireReader_ReadLongVarintRet res = + _upb_WireReader_ReadLongVarint(ptr, byte); + if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) || + res.val > maxval) { + return NULL; // Malformed. + } + *val = res.val; + return res.ptr; +} + +// Parses a tag into `tag`, and returns a pointer past the end of the tag, or +// NULL if there was an error in the tag data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, + uint32_t* tag) { + uint64_t val; + ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX); + if (!ptr) return NULL; + *tag = val; + return ptr; +} + +// Given a tag, returns the field number. +UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) { + return tag >> 3; +} + +// Given a tag, returns the wire type. +UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { return tag & 3; } + +UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr, + uint64_t* val) { + return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX); +} + +// Skips data for a varint, returning a pointer past the end of the varint, or +// NULL if there was an error in the varint data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_SkipVarint(const char* ptr) { + uint64_t val; + return upb_WireReader_ReadVarint(ptr, &val); +} + +// Reads a varint indicating the size of a delimited field into `size`, or +// NULL if there was an error in the varint data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) { + uint64_t size64; + ptr = upb_WireReader_ReadVarint(ptr, &size64); + if (!ptr || size64 >= INT32_MAX) return NULL; + *size = size64; + return ptr; +} + +// Skips data for a group, returning a pointer past the end of the group, or +// NULL if there was an error parsing the group. The `tag` argument should be +// the start group tag that begins the group. The `depth_limit` argument +// indicates how many levels of recursion the group is allowed to have before +// reporting a parse error (this limit exists to protect against stack +// overflow). +const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream); + +// Skips data for a wire value of any type, returning a pointer past the end of +// the data, or NULL if there was an error parsing the group. The `tag` argument +// should be the tag that was just parsed. The `depth_limit` argument indicates +// how many levels of recursion a group is allowed to have before reporting a +// parse error (this limit exists to protect against stack overflow). +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_SkipValue( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream) { + switch (upb_WireReader_GetWireType(tag)) { + case kUpb_WireType_Varint: + return upb_WireReader_SkipVarint(ptr); + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_Delimited: { + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!ptr) return NULL; + ptr += size; + return ptr; + } + case kUpb_WireType_StartGroup: + return upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); + case kUpb_WireType_EndGroup: + return NULL; // Should be handled before now. + default: + return NULL; // Unknown wire type. + } +} + +#include "upb/port/undef.inc" + +#endif // UPB_WIRE_READER_H_