From 175c96565f7ed97c07a78da0698c5e929dc7ef0d Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Tue, 31 Mar 2020 18:55:31 +0200 Subject: [PATCH] bring in new files --- .../src/Google.Protobuf/CodedInputStream.cs | 1179 ++--------------- csharp/src/Google.Protobuf/IBufferMessage.cs | 63 + csharp/src/Google.Protobuf/ParseContext.cs | 322 +++++ .../Google.Protobuf/ParserInternalState.cs | 116 ++ .../src/Google.Protobuf/ParsingPrimitives.cs | 726 ++++++++++ .../ParsingPrimitivesMessages.cs | 231 ++++ .../ParsingPrimitivesWrappers.cs | 352 +++++ .../Google.Protobuf/SegmentedBufferHelper.cs | 283 ++++ 8 files changed, 2196 insertions(+), 1076 deletions(-) create mode 100644 csharp/src/Google.Protobuf/IBufferMessage.cs create mode 100644 csharp/src/Google.Protobuf/ParseContext.cs create mode 100644 csharp/src/Google.Protobuf/ParserInternalState.cs create mode 100644 csharp/src/Google.Protobuf/ParsingPrimitives.cs create mode 100644 csharp/src/Google.Protobuf/ParsingPrimitivesMessages.cs create mode 100644 csharp/src/Google.Protobuf/ParsingPrimitivesWrappers.cs create mode 100644 csharp/src/Google.Protobuf/SegmentedBufferHelper.cs diff --git a/csharp/src/Google.Protobuf/CodedInputStream.cs b/csharp/src/Google.Protobuf/CodedInputStream.cs index 9976f58234..248f7e4a3d 100644 --- a/csharp/src/Google.Protobuf/CodedInputStream.cs +++ b/csharp/src/Google.Protobuf/CodedInputStream.cs @@ -34,6 +34,9 @@ using Google.Protobuf.Collections; using System; using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Security; namespace Google.Protobuf { @@ -64,17 +67,6 @@ namespace Google.Protobuf /// private readonly byte[] buffer; - /// - /// The index of the buffer at which we need to refill from the stream (if there is one). - /// - private int bufferSize; - - private int bufferSizeAfterLimit = 0; - /// - /// The position within the current buffer (i.e. the next byte to read) - /// - private int bufferPos = 0; - /// /// The stream to read further input from, or null if the byte array buffer was provided /// directly on construction, with no further data available. @@ -82,38 +74,15 @@ namespace Google.Protobuf private readonly Stream input; /// - /// The last tag we read. 0 indicates we've read to the end of the stream - /// (or haven't read anything yet). + /// The parser state is kept separately so that other parse implementations can reuse the same + /// parsing primitives. /// - private uint lastTag = 0; - - /// - /// The next tag, used to store the value read by PeekTag. - /// - private uint nextTag = 0; - private bool hasNextTag = false; + private ParserInternalState state; internal const int DefaultRecursionLimit = 100; internal const int DefaultSizeLimit = Int32.MaxValue; internal const int BufferSize = 4096; - /// - /// The total number of bytes read before the current buffer. The - /// total bytes read up to the current position can be computed as - /// totalBytesRetired + bufferPos. - /// - private int totalBytesRetired = 0; - - /// - /// The absolute position of the end of the current message. - /// - private int currentLimit = int.MaxValue; - - private int recursionDepth = 0; - - private readonly int recursionLimit; - private readonly int sizeLimit; - #region Construction // Note that the checks are performed such that we don't end up checking obviously-valid things // like non-null references for arrays we've just created. @@ -170,11 +139,15 @@ namespace Google.Protobuf { this.input = input; this.buffer = buffer; - this.bufferPos = bufferPos; - this.bufferSize = bufferSize; - this.sizeLimit = DefaultSizeLimit; - this.recursionLimit = DefaultRecursionLimit; + this.state.bufferPos = bufferPos; + this.state.bufferSize = bufferSize; + this.state.sizeLimit = DefaultSizeLimit; + this.state.recursionLimit = DefaultRecursionLimit; + this.state.segmentedBufferHelper = new SegmentedBufferHelper(this); + this.state.codedInputStream = this; this.leaveOpen = leaveOpen; + + this.state.currentLimit = int.MaxValue; } /// @@ -196,8 +169,8 @@ namespace Google.Protobuf { throw new ArgumentOutOfRangeException("recursionLimit!", "Recursion limit must be positive"); } - this.sizeLimit = sizeLimit; - this.recursionLimit = recursionLimit; + this.state.sizeLimit = sizeLimit; + this.state.recursionLimit = recursionLimit; } #endregion @@ -230,9 +203,9 @@ namespace Google.Protobuf { if (input != null) { - return input.Position - ((bufferSize + bufferSizeAfterLimit) - bufferPos); + return input.Position - ((state.bufferSize + state.bufferSizeAfterLimit) - state.bufferPos); } - return bufferPos; + return state.bufferPos; } } @@ -240,7 +213,7 @@ namespace Google.Protobuf /// Returns the last tag read, or 0 if no tags have been read or we've read beyond /// the end of the stream. /// - internal uint LastTag { get { return lastTag; } } + internal uint LastTag { get { return state.lastTag; } } /// /// Returns the size limit for this stream. @@ -253,7 +226,7 @@ namespace Google.Protobuf /// /// The size limit. /// - public int SizeLimit { get { return sizeLimit; } } + public int SizeLimit { get { return state.sizeLimit; } } /// /// Returns the recursion limit for this stream. This limit is applied whilst reading messages, @@ -265,17 +238,31 @@ namespace Google.Protobuf /// /// The recursion limit for this stream. /// - public int RecursionLimit { get { return recursionLimit; } } + public int RecursionLimit { get { return state.recursionLimit; } } /// /// Internal-only property; when set to true, unknown fields will be discarded while parsing. /// - internal bool DiscardUnknownFields { get; set; } + internal bool DiscardUnknownFields + { + get { return state.DiscardUnknownFields; } + set { state.DiscardUnknownFields = value; } + } /// /// Internal-only property; provides extension identifiers to compatible messages while parsing. /// - internal ExtensionRegistry ExtensionRegistry { get; set; } + internal ExtensionRegistry ExtensionRegistry + { + get { return state.ExtensionRegistry; } + set { state.ExtensionRegistry = value; } + } + + internal byte[] InternalBuffer => buffer; + + internal Stream InternalInputStream => input; + + internal ref ParserInternalState InternalState => ref state; /// /// Disposes of this instance, potentially closing any underlying stream. @@ -302,17 +289,7 @@ namespace Google.Protobuf /// tag read was not the one specified internal void CheckReadEndOfStreamTag() { - if (lastTag != 0) - { - throw InvalidProtocolBufferException.MoreDataAvailable(); - } - } - - internal void CheckLastTagWas(uint expectedTag) - { - if (lastTag != expectedTag) { - throw InvalidProtocolBufferException.InvalidEndTag(); - } + ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref state); } #endregion @@ -325,16 +302,8 @@ namespace Google.Protobuf /// public uint PeekTag() { - if (hasNextTag) - { - return nextTag; - } - - uint savedLast = lastTag; - nextTag = ReadTag(); - hasNextTag = true; - lastTag = savedLast; // Undo the side effect of ReadTag - return nextTag; + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.PeekTag(ref span, ref state); } /// @@ -348,54 +317,8 @@ namespace Google.Protobuf /// The next field tag, or 0 for end of stream. (0 is never a valid tag.) public uint ReadTag() { - if (hasNextTag) - { - lastTag = nextTag; - hasNextTag = false; - return lastTag; - } - - // Optimize for the incredibly common case of having at least two bytes left in the buffer, - // and those two bytes being enough to get the tag. This will be true for fields up to 4095. - if (bufferPos + 2 <= bufferSize) - { - int tmp = buffer[bufferPos++]; - if (tmp < 128) - { - lastTag = (uint)tmp; - } - else - { - int result = tmp & 0x7f; - if ((tmp = buffer[bufferPos++]) < 128) - { - result |= tmp << 7; - lastTag = (uint) result; - } - else - { - // Nope, rewind and go the potentially slow route. - bufferPos -= 2; - lastTag = ReadRawVarint32(); - } - } - } - else - { - if (IsAtEnd) - { - lastTag = 0; - return 0; - } - - lastTag = ReadRawVarint32(); - } - if (WireFormat.GetTagFieldNumber(lastTag) == 0) - { - // If we actually read a tag with a field of 0, that's not a valid tag. - throw InvalidProtocolBufferException.InvalidTag(); - } - return lastTag; + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseTag(ref span, ref state); } /// @@ -413,32 +336,8 @@ namespace Google.Protobuf /// The last read operation read to the end of the logical stream public void SkipLastField() { - if (lastTag == 0) - { - throw new InvalidOperationException("SkipLastField cannot be called at the end of a stream"); - } - switch (WireFormat.GetTagWireType(lastTag)) - { - case WireFormat.WireType.StartGroup: - SkipGroup(lastTag); - break; - case WireFormat.WireType.EndGroup: - throw new InvalidProtocolBufferException( - "SkipLastField called on an end-group tag, indicating that the corresponding start-group was missing"); - case WireFormat.WireType.Fixed32: - ReadFixed32(); - break; - case WireFormat.WireType.Fixed64: - ReadFixed64(); - break; - case WireFormat.WireType.LengthDelimited: - var length = ReadLength(); - SkipRawBytes(length); - break; - case WireFormat.WireType.Varint: - ReadRawVarint32(); - break; - } + var span = new ReadOnlySpan(buffer); + ParsingPrimitivesMessages.SkipLastField(ref span, ref state); } /// @@ -446,37 +345,8 @@ namespace Google.Protobuf /// internal void SkipGroup(uint startGroupTag) { - // Note: Currently we expect this to be the way that groups are read. We could put the recursion - // depth changes into the ReadTag method instead, potentially... - recursionDepth++; - if (recursionDepth >= recursionLimit) - { - throw InvalidProtocolBufferException.RecursionLimitExceeded(); - } - uint tag; - while (true) - { - tag = ReadTag(); - if (tag == 0) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - // Can't call SkipLastField for this case- that would throw. - if (WireFormat.GetTagWireType(tag) == WireFormat.WireType.EndGroup) - { - break; - } - // This recursion will allow us to handle nested groups. - SkipLastField(); - } - int startField = WireFormat.GetTagFieldNumber(startGroupTag); - int endField = WireFormat.GetTagFieldNumber(tag); - if (startField != endField) - { - throw new InvalidProtocolBufferException( - $"Mismatched end-group tag. Started with field {startField}; ended with field {endField}"); - } - recursionDepth--; + var span = new ReadOnlySpan(buffer); + ParsingPrimitivesMessages.SkipGroup(ref span, ref state, startGroupTag); } /// @@ -484,33 +354,8 @@ namespace Google.Protobuf /// public double ReadDouble() { - if (bufferPos + 8 <= bufferSize) - { - if (BitConverter.IsLittleEndian) - { - var result = BitConverter.ToDouble(buffer, bufferPos); - bufferPos += 8; - return result; - } - else - { - var bytes = new byte[8]; - bytes[0] = buffer[bufferPos + 7]; - bytes[1] = buffer[bufferPos + 6]; - bytes[2] = buffer[bufferPos + 5]; - bytes[3] = buffer[bufferPos + 4]; - bytes[4] = buffer[bufferPos + 3]; - bytes[5] = buffer[bufferPos + 2]; - bytes[6] = buffer[bufferPos + 1]; - bytes[7] = buffer[bufferPos]; - bufferPos += 8; - return BitConverter.ToDouble(bytes, 0); - } - } - else - { - return BitConverter.Int64BitsToDouble((long)ReadRawLittleEndian64()); - } + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseDouble(ref span, ref state); } /// @@ -518,21 +363,8 @@ namespace Google.Protobuf /// public float ReadFloat() { - if (BitConverter.IsLittleEndian && 4 <= bufferSize - bufferPos) - { - float ret = BitConverter.ToSingle(buffer, bufferPos); - bufferPos += 4; - return ret; - } - else - { - byte[] rawBytes = ReadRawBytes(4); - if (!BitConverter.IsLittleEndian) - { - ByteArray.Reverse(rawBytes); - } - return BitConverter.ToSingle(rawBytes, 0); - } + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseFloat(ref span, ref state); } /// @@ -589,21 +421,8 @@ namespace Google.Protobuf public string ReadString() { int length = ReadLength(); - // No need to read any data for an empty string. - if (length == 0) - { - return ""; - } - if (length <= bufferSize - bufferPos && length > 0) - { - // Fast path: We already have the bytes in a contiguous buffer, so - // just copy directly from it. - String result = CodedOutputStream.Utf8Encoding.GetString(buffer, bufferPos, length); - bufferPos += length; - return result; - } - // Slow path: Build a byte array first then copy it. - return CodedOutputStream.Utf8Encoding.GetString(ReadRawBytes(length), 0, length); + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ReadRawString(ref span, ref state, length); } /// @@ -611,22 +430,17 @@ namespace Google.Protobuf /// public void ReadMessage(IMessage builder) { - int length = ReadLength(); - if (recursionDepth >= recursionLimit) + var span = new ReadOnlySpan(buffer); + var ctx = new CodedInputReader(ref span, ref state); + try { - throw InvalidProtocolBufferException.RecursionLimitExceeded(); + ParsingPrimitivesMessages.ReadMessage(ref ctx, builder); } - int oldLimit = PushLimit(length); - ++recursionDepth; - builder.MergeFrom(this); - CheckReadEndOfStreamTag(); - // Check that we've read exactly as much data as expected. - if (!ReachedLimit) + finally { - throw InvalidProtocolBufferException.TruncatedMessage(); + // store the state + state = ctx.state; } - --recursionDepth; - PopLimit(oldLimit); } /// @@ -634,33 +448,15 @@ namespace Google.Protobuf /// public void ReadGroup(IMessage builder) { - if (recursionDepth >= recursionLimit) + var ctx = new ParseContext(this); + try { - throw InvalidProtocolBufferException.RecursionLimitExceeded(); + ParsingPrimitivesMessages.ReadGroup(ref ctx, builder); } - ++recursionDepth; - - uint tag = lastTag; - int fieldNumber = WireFormat.GetTagFieldNumber(tag); - - builder.MergeFrom(this); - CheckLastTagWas(WireFormat.MakeTag(fieldNumber, WireFormat.WireType.EndGroup)); - --recursionDepth; - } - - /// - /// Reads an embedded group unknown field from the stream. - /// - internal void ReadGroup(int fieldNumber, UnknownFieldSet set) - { - if (recursionDepth >= recursionLimit) + finally { - throw InvalidProtocolBufferException.RecursionLimitExceeded(); + ctx.CopyStateTo(this); } - ++recursionDepth; - set.MergeGroupFrom(this); - CheckLastTagWas(WireFormat.MakeTag(fieldNumber, WireFormat.WireType.EndGroup)); - --recursionDepth; } /// @@ -669,12 +465,12 @@ namespace Google.Protobuf public ByteString ReadBytes() { int length = ReadLength(); - if (length <= bufferSize - bufferPos && length > 0) + if (length <= state.bufferSize - state.bufferPos && length > 0) { // Fast path: We already have the bytes in a contiguous buffer, so // just copy directly from it. - ByteString result = ByteString.CopyFrom(buffer, bufferPos, length); - bufferPos += length; + ByteString result = ByteString.CopyFrom(buffer, state.bufferPos, length); + state.bufferPos += length; return result; } else @@ -722,7 +518,7 @@ namespace Google.Protobuf /// public int ReadSInt32() { - return DecodeZigZag32(ReadRawVarint32()); + return ParsingPrimitives.DecodeZigZag32(ReadRawVarint32()); } /// @@ -730,7 +526,7 @@ namespace Google.Protobuf /// public long ReadSInt64() { - return DecodeZigZag64(ReadRawVarint64()); + return ParsingPrimitives.DecodeZigZag64(ReadRawVarint64()); } /// @@ -742,7 +538,8 @@ namespace Google.Protobuf /// public int ReadLength() { - return (int) ReadRawVarint32(); + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseLength(ref span, ref state); } /// @@ -752,323 +549,14 @@ namespace Google.Protobuf /// public bool MaybeConsumeTag(uint tag) { - if (PeekTag() == tag) - { - hasNextTag = false; - return true; - } - return false; - } - - internal static float? ReadFloatWrapperLittleEndian(CodedInputStream input) - { - // length:1 + tag:1 + value:4 = 6 bytes - if (input.bufferPos + 6 <= input.bufferSize) - { - // The entire wrapper message is already contained in `buffer`. - int length = input.buffer[input.bufferPos]; - if (length == 0) - { - input.bufferPos++; - return 0F; - } - // tag:1 + value:4 = length of 5 bytes - // field=1, type=32-bit = tag of 13 - if (length != 5 || input.buffer[input.bufferPos + 1] != 13) - { - return ReadFloatWrapperSlow(input); - } - var result = BitConverter.ToSingle(input.buffer, input.bufferPos + 2); - input.bufferPos += 6; - return result; - } - else - { - return ReadFloatWrapperSlow(input); - } - } - - internal static float? ReadFloatWrapperSlow(CodedInputStream input) - { - int length = input.ReadLength(); - if (length == 0) - { - return 0F; - } - int finalBufferPos = input.totalBytesRetired + input.bufferPos + length; - float result = 0F; - do - { - // field=1, type=32-bit = tag of 13 - if (input.ReadTag() == 13) - { - result = input.ReadFloat(); - } - else - { - input.SkipLastField(); - } - } - while (input.totalBytesRetired + input.bufferPos < finalBufferPos); - return result; - } - - internal static double? ReadDoubleWrapperLittleEndian(CodedInputStream input) - { - // length:1 + tag:1 + value:8 = 10 bytes - if (input.bufferPos + 10 <= input.bufferSize) - { - // The entire wrapper message is already contained in `buffer`. - int length = input.buffer[input.bufferPos]; - if (length == 0) - { - input.bufferPos++; - return 0D; - } - // tag:1 + value:8 = length of 9 bytes - // field=1, type=64-bit = tag of 9 - if (length != 9 || input.buffer[input.bufferPos + 1] != 9) - { - return ReadDoubleWrapperSlow(input); - } - var result = BitConverter.ToDouble(input.buffer, input.bufferPos + 2); - input.bufferPos += 10; - return result; - } - else - { - return ReadDoubleWrapperSlow(input); - } - } - - internal static double? ReadDoubleWrapperSlow(CodedInputStream input) - { - int length = input.ReadLength(); - if (length == 0) - { - return 0D; - } - int finalBufferPos = input.totalBytesRetired + input.bufferPos + length; - double result = 0D; - do - { - // field=1, type=64-bit = tag of 9 - if (input.ReadTag() == 9) - { - result = input.ReadDouble(); - } - else - { - input.SkipLastField(); - } - } - while (input.totalBytesRetired + input.bufferPos < finalBufferPos); - return result; - } - - internal static bool? ReadBoolWrapper(CodedInputStream input) - { - return ReadUInt64Wrapper(input) != 0; - } - - internal static uint? ReadUInt32Wrapper(CodedInputStream input) - { - // length:1 + tag:1 + value:5(varint32-max) = 7 bytes - if (input.bufferPos + 7 <= input.bufferSize) - { - // The entire wrapper message is already contained in `buffer`. - int pos0 = input.bufferPos; - int length = input.buffer[input.bufferPos++]; - if (length == 0) - { - return 0; - } - // Length will always fit in a single byte. - if (length >= 128) - { - input.bufferPos = pos0; - return ReadUInt32WrapperSlow(input); - } - int finalBufferPos = input.bufferPos + length; - // field=1, type=varint = tag of 8 - if (input.buffer[input.bufferPos++] != 8) - { - input.bufferPos = pos0; - return ReadUInt32WrapperSlow(input); - } - var result = input.ReadUInt32(); - // Verify this message only contained a single field. - if (input.bufferPos != finalBufferPos) - { - input.bufferPos = pos0; - return ReadUInt32WrapperSlow(input); - } - return result; - } - else - { - return ReadUInt32WrapperSlow(input); - } - } - - private static uint? ReadUInt32WrapperSlow(CodedInputStream input) - { - int length = input.ReadLength(); - if (length == 0) - { - return 0; - } - int finalBufferPos = input.totalBytesRetired + input.bufferPos + length; - uint result = 0; - do - { - // field=1, type=varint = tag of 8 - if (input.ReadTag() == 8) - { - result = input.ReadUInt32(); - } - else - { - input.SkipLastField(); - } - } - while (input.totalBytesRetired + input.bufferPos < finalBufferPos); - return result; - } - - internal static int? ReadInt32Wrapper(CodedInputStream input) - { - return (int?)ReadUInt32Wrapper(input); - } - - internal static ulong? ReadUInt64Wrapper(CodedInputStream input) - { - // field=1, type=varint = tag of 8 - const int expectedTag = 8; - // length:1 + tag:1 + value:10(varint64-max) = 12 bytes - if (input.bufferPos + 12 <= input.bufferSize) - { - // The entire wrapper message is already contained in `buffer`. - int pos0 = input.bufferPos; - int length = input.buffer[input.bufferPos++]; - if (length == 0) - { - return 0L; - } - // Length will always fit in a single byte. - if (length >= 128) - { - input.bufferPos = pos0; - return ReadUInt64WrapperSlow(input); - } - int finalBufferPos = input.bufferPos + length; - if (input.buffer[input.bufferPos++] != expectedTag) - { - input.bufferPos = pos0; - return ReadUInt64WrapperSlow(input); - } - var result = input.ReadUInt64(); - // Verify this message only contained a single field. - if (input.bufferPos != finalBufferPos) - { - input.bufferPos = pos0; - return ReadUInt64WrapperSlow(input); - } - return result; - } - else - { - return ReadUInt64WrapperSlow(input); - } - } - - internal static ulong? ReadUInt64WrapperSlow(CodedInputStream input) - { - // field=1, type=varint = tag of 8 - const int expectedTag = 8; - int length = input.ReadLength(); - if (length == 0) - { - return 0L; - } - int finalBufferPos = input.totalBytesRetired + input.bufferPos + length; - ulong result = 0L; - do - { - if (input.ReadTag() == expectedTag) - { - result = input.ReadUInt64(); - } - else - { - input.SkipLastField(); - } - } - while (input.totalBytesRetired + input.bufferPos < finalBufferPos); - return result; - } - - internal static long? ReadInt64Wrapper(CodedInputStream input) - { - return (long?)ReadUInt64Wrapper(input); + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.MaybeConsumeTag(ref span, ref state, tag); } #endregion #region Underlying reading primitives - /// - /// Same code as ReadRawVarint32, but read each byte individually, checking for - /// buffer overflow. - /// - private uint SlowReadRawVarint32() - { - int tmp = ReadRawByte(); - if (tmp < 128) - { - return (uint) tmp; - } - int result = tmp & 0x7f; - if ((tmp = ReadRawByte()) < 128) - { - result |= tmp << 7; - } - else - { - result |= (tmp & 0x7f) << 7; - if ((tmp = ReadRawByte()) < 128) - { - result |= tmp << 14; - } - else - { - result |= (tmp & 0x7f) << 14; - if ((tmp = ReadRawByte()) < 128) - { - result |= tmp << 21; - } - else - { - result |= (tmp & 0x7f) << 21; - result |= (tmp = ReadRawByte()) << 28; - if (tmp >= 128) - { - // Discard upper 32 bits. - for (int i = 0; i < 5; i++) - { - if (ReadRawByte() < 128) - { - return (uint) result; - } - } - throw InvalidProtocolBufferException.MalformedVarint(); - } - } - } - } - return (uint) result; - } - /// /// Reads a raw Varint from the stream. If larger than 32 bits, discard the upper bits. /// This method is optimised for the case where we've got lots of data in the buffer. @@ -1077,58 +565,8 @@ namespace Google.Protobuf /// internal uint ReadRawVarint32() { - if (bufferPos + 5 > bufferSize) - { - return SlowReadRawVarint32(); - } - - int tmp = buffer[bufferPos++]; - if (tmp < 128) - { - return (uint) tmp; - } - int result = tmp & 0x7f; - if ((tmp = buffer[bufferPos++]) < 128) - { - result |= tmp << 7; - } - else - { - result |= (tmp & 0x7f) << 7; - if ((tmp = buffer[bufferPos++]) < 128) - { - result |= tmp << 14; - } - else - { - result |= (tmp & 0x7f) << 14; - if ((tmp = buffer[bufferPos++]) < 128) - { - result |= tmp << 21; - } - else - { - result |= (tmp & 0x7f) << 21; - result |= (tmp = buffer[bufferPos++]) << 28; - if (tmp >= 128) - { - // Discard upper 32 bits. - // Note that this has to use ReadRawByte() as we only ensure we've - // got at least 5 bytes at the start of the method. This lets us - // use the fast path in more cases, and we rarely hit this section of code. - for (int i = 0; i < 5; i++) - { - if (ReadRawByte() < 128) - { - return (uint) result; - } - } - throw InvalidProtocolBufferException.MalformedVarint(); - } - } - } - } - return (uint) result; + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseRawVarint32(ref span, ref state); } /// @@ -1142,35 +580,7 @@ namespace Google.Protobuf /// internal static uint ReadRawVarint32(Stream input) { - int result = 0; - int offset = 0; - for (; offset < 32; offset += 7) - { - int b = input.ReadByte(); - if (b == -1) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - result |= (b & 0x7f) << offset; - if ((b & 0x80) == 0) - { - return (uint) result; - } - } - // Keep reading up to 64 bits. - for (; offset < 64; offset += 7) - { - int b = input.ReadByte(); - if (b == -1) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - if ((b & 0x80) == 0) - { - return (uint) result; - } - } - throw InvalidProtocolBufferException.MalformedVarint(); + return ParsingPrimitives.ReadRawVarint32(input); } /// @@ -1178,44 +588,8 @@ namespace Google.Protobuf /// internal ulong ReadRawVarint64() { - if (bufferPos + 10 <= bufferSize) - { - ulong result = buffer[bufferPos++]; - if (result < 128) - { - return result; - } - result &= 0x7f; - int shift = 7; - do - { - byte b = buffer[bufferPos++]; - result |= (ulong)(b & 0x7F) << shift; - if (b < 0x80) - { - return result; - } - shift += 7; - } - while (shift < 64); - } - else - { - int shift = 0; - ulong result = 0; - do - { - byte b = ReadRawByte(); - result |= (ulong)(b & 0x7F) << shift; - if (b < 0x80) - { - return result; - } - shift += 7; - } - while (shift < 64); - } - throw InvalidProtocolBufferException.MalformedVarint(); + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseRawVarint64(ref span, ref state); } /// @@ -1223,32 +597,8 @@ namespace Google.Protobuf /// internal uint ReadRawLittleEndian32() { - if (bufferPos + 4 <= bufferSize) - { - if (BitConverter.IsLittleEndian) - { - var result = BitConverter.ToUInt32(buffer, bufferPos); - bufferPos += 4; - return result; - } - else - { - uint b1 = buffer[bufferPos]; - uint b2 = buffer[bufferPos + 1]; - uint b3 = buffer[bufferPos + 2]; - uint b4 = buffer[bufferPos + 3]; - bufferPos += 4; - return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24); - } - } - else - { - uint b1 = ReadRawByte(); - uint b2 = ReadRawByte(); - uint b3 = ReadRawByte(); - uint b4 = ReadRawByte(); - return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24); - } + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseRawLittleEndian32(ref span, ref state); } /// @@ -1256,70 +606,8 @@ namespace Google.Protobuf /// internal ulong ReadRawLittleEndian64() { - if (bufferPos + 8 <= bufferSize) - { - if (BitConverter.IsLittleEndian) - { - var result = BitConverter.ToUInt64(buffer, bufferPos); - bufferPos += 8; - return result; - } - else - { - ulong b1 = buffer[bufferPos]; - ulong b2 = buffer[bufferPos + 1]; - ulong b3 = buffer[bufferPos + 2]; - ulong b4 = buffer[bufferPos + 3]; - ulong b5 = buffer[bufferPos + 4]; - ulong b6 = buffer[bufferPos + 5]; - ulong b7 = buffer[bufferPos + 6]; - ulong b8 = buffer[bufferPos + 7]; - bufferPos += 8; - return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24) - | (b5 << 32) | (b6 << 40) | (b7 << 48) | (b8 << 56); - } - } - else - { - ulong b1 = ReadRawByte(); - ulong b2 = ReadRawByte(); - ulong b3 = ReadRawByte(); - ulong b4 = ReadRawByte(); - ulong b5 = ReadRawByte(); - ulong b6 = ReadRawByte(); - ulong b7 = ReadRawByte(); - ulong b8 = ReadRawByte(); - return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24) - | (b5 << 32) | (b6 << 40) | (b7 << 48) | (b8 << 56); - } - } - - /// - /// Decode a 32-bit value with ZigZag encoding. - /// - /// - /// ZigZag encodes signed integers into values that can be efficiently - /// encoded with varint. (Otherwise, negative values must be - /// sign-extended to 64 bits to be varint encoded, thus always taking - /// 10 bytes on the wire.) - /// - internal static int DecodeZigZag32(uint n) - { - return (int)(n >> 1) ^ -(int)(n & 1); - } - - /// - /// Decode a 32-bit value with ZigZag encoding. - /// - /// - /// ZigZag encodes signed integers into values that can be efficiently - /// encoded with varint. (Otherwise, negative values must be - /// sign-extended to 64 bits to be varint encoded, thus always taking - /// 10 bytes on the wire.) - /// - internal static long DecodeZigZag64(ulong n) - { - return (long)(n >> 1) ^ -(long)(n & 1); + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ParseRawLittleEndian64(ref span, ref state); } #endregion @@ -1333,37 +621,7 @@ namespace Google.Protobuf /// The old limit. internal int PushLimit(int byteLimit) { - if (byteLimit < 0) - { - throw InvalidProtocolBufferException.NegativeSize(); - } - byteLimit += totalBytesRetired + bufferPos; - int oldLimit = currentLimit; - if (byteLimit > oldLimit) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - currentLimit = byteLimit; - - RecomputeBufferSizeAfterLimit(); - - return oldLimit; - } - - private void RecomputeBufferSizeAfterLimit() - { - bufferSize += bufferSizeAfterLimit; - int bufferEnd = totalBytesRetired + bufferSize; - if (bufferEnd > currentLimit) - { - // Limit is in current buffer. - bufferSizeAfterLimit = bufferEnd - currentLimit; - bufferSize -= bufferSizeAfterLimit; - } - else - { - bufferSizeAfterLimit = 0; - } + return SegmentedBufferHelper.PushLimit(ref state, byteLimit); } /// @@ -1371,8 +629,7 @@ namespace Google.Protobuf /// internal void PopLimit(int oldLimit) { - currentLimit = oldLimit; - RecomputeBufferSizeAfterLimit(); + SegmentedBufferHelper.PopLimit(ref state, oldLimit); } /// @@ -1383,12 +640,7 @@ namespace Google.Protobuf { get { - if (currentLimit == int.MaxValue) - { - return false; - } - int currentAbsolutePosition = totalBytesRetired + bufferPos; - return currentAbsolutePosition >= currentLimit; + return SegmentedBufferHelper.IsReachedLimit(ref state); } } @@ -1399,7 +651,11 @@ namespace Google.Protobuf /// public bool IsAtEnd { - get { return bufferPos == bufferSize && !RefillBuffer(false); } + get + { + var span = new ReadOnlySpan(buffer); + return SegmentedBufferHelper.IsAtEnd(ref span, ref state); + } } /// @@ -1413,69 +669,8 @@ namespace Google.Protobuf /// private bool RefillBuffer(bool mustSucceed) { - if (bufferPos < bufferSize) - { - throw new InvalidOperationException("RefillBuffer() called when buffer wasn't empty."); - } - - if (totalBytesRetired + bufferSize == currentLimit) - { - // Oops, we hit a limit. - if (mustSucceed) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - else - { - return false; - } - } - - totalBytesRetired += bufferSize; - - bufferPos = 0; - bufferSize = (input == null) ? 0 : input.Read(buffer, 0, buffer.Length); - if (bufferSize < 0) - { - throw new InvalidOperationException("Stream.Read returned a negative count"); - } - if (bufferSize == 0) - { - if (mustSucceed) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - else - { - return false; - } - } - else - { - RecomputeBufferSizeAfterLimit(); - int totalBytesRead = - totalBytesRetired + bufferSize + bufferSizeAfterLimit; - if (totalBytesRead < 0 || totalBytesRead > sizeLimit) - { - throw InvalidProtocolBufferException.SizeLimitExceeded(); - } - return true; - } - } - - /// - /// Read one byte from the input. - /// - /// - /// the end of the stream or the current limit was reached - /// - internal byte ReadRawByte() - { - if (bufferPos == bufferSize) - { - RefillBuffer(true); - } - return buffer[bufferPos++]; + var span = new ReadOnlySpan(buffer); + return state.segmentedBufferHelper.RefillBuffer(ref span, ref state, mustSucceed); } /// @@ -1486,193 +681,25 @@ namespace Google.Protobuf /// internal byte[] ReadRawBytes(int size) { - if (size < 0) - { - throw InvalidProtocolBufferException.NegativeSize(); - } - - if (totalBytesRetired + bufferPos + size > currentLimit) - { - // Read to the end of the stream (up to the current limit) anyway. - SkipRawBytes(currentLimit - totalBytesRetired - bufferPos); - // Then fail. - throw InvalidProtocolBufferException.TruncatedMessage(); - } - - if (size <= bufferSize - bufferPos) - { - // We have all the bytes we need already. - byte[] bytes = new byte[size]; - ByteArray.Copy(buffer, bufferPos, bytes, 0, size); - bufferPos += size; - return bytes; - } - else if (size < buffer.Length) - { - // Reading more bytes than are in the buffer, but not an excessive number - // of bytes. We can safely allocate the resulting array ahead of time. - - // First copy what we have. - byte[] bytes = new byte[size]; - int pos = bufferSize - bufferPos; - ByteArray.Copy(buffer, bufferPos, bytes, 0, pos); - bufferPos = bufferSize; - - // We want to use RefillBuffer() and then copy from the buffer into our - // byte array rather than reading directly into our byte array because - // the input may be unbuffered. - RefillBuffer(true); - - while (size - pos > bufferSize) - { - Buffer.BlockCopy(buffer, 0, bytes, pos, bufferSize); - pos += bufferSize; - bufferPos = bufferSize; - RefillBuffer(true); - } - - ByteArray.Copy(buffer, 0, bytes, pos, size - pos); - bufferPos = size - pos; - - return bytes; - } - else - { - // The size is very large. For security reasons, we can't allocate the - // entire byte array yet. The size comes directly from the input, so a - // maliciously-crafted message could provide a bogus very large size in - // order to trick the app into allocating a lot of memory. We avoid this - // by allocating and reading only a small chunk at a time, so that the - // malicious message must actually *be* extremely large to cause - // problems. Meanwhile, we limit the allowed size of a message elsewhere. - - // Remember the buffer markers since we'll have to copy the bytes out of - // it later. - int originalBufferPos = bufferPos; - int originalBufferSize = bufferSize; - - // Mark the current buffer consumed. - totalBytesRetired += bufferSize; - bufferPos = 0; - bufferSize = 0; - - // Read all the rest of the bytes we need. - int sizeLeft = size - (originalBufferSize - originalBufferPos); - List chunks = new List(); - - while (sizeLeft > 0) - { - byte[] chunk = new byte[Math.Min(sizeLeft, buffer.Length)]; - int pos = 0; - while (pos < chunk.Length) - { - int n = (input == null) ? -1 : input.Read(chunk, pos, chunk.Length - pos); - if (n <= 0) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - totalBytesRetired += n; - pos += n; - } - sizeLeft -= chunk.Length; - chunks.Add(chunk); - } - - // OK, got everything. Now concatenate it all into one buffer. - byte[] bytes = new byte[size]; - - // Start by copying the leftover bytes from this.buffer. - int newPos = originalBufferSize - originalBufferPos; - ByteArray.Copy(buffer, originalBufferPos, bytes, 0, newPos); - - // And now all the chunks. - foreach (byte[] chunk in chunks) - { - Buffer.BlockCopy(chunk, 0, bytes, newPos, chunk.Length); - newPos += chunk.Length; - } - - // Done. - return bytes; - } - } - - /// - /// Reads and discards bytes. - /// - /// the end of the stream - /// or the current limit was reached - private void SkipRawBytes(int size) - { - if (size < 0) - { - throw InvalidProtocolBufferException.NegativeSize(); - } - - if (totalBytesRetired + bufferPos + size > currentLimit) - { - // Read to the end of the stream anyway. - SkipRawBytes(currentLimit - totalBytesRetired - bufferPos); - // Then fail. - throw InvalidProtocolBufferException.TruncatedMessage(); - } - - if (size <= bufferSize - bufferPos) - { - // We have all the bytes we need already. - bufferPos += size; - } - else - { - // Skipping more bytes than are in the buffer. First skip what we have. - int pos = bufferSize - bufferPos; - - // ROK 5/7/2013 Issue #54: should retire all bytes in buffer (bufferSize) - // totalBytesRetired += pos; - totalBytesRetired += bufferSize; - - bufferPos = 0; - bufferSize = 0; - - // Then skip directly from the InputStream for the rest. - if (pos < size) - { - if (input == null) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - SkipImpl(size - pos); - totalBytesRetired += size - pos; - } - } + var span = new ReadOnlySpan(buffer); + return ParsingPrimitives.ReadRawBytes(ref span, ref state, size); } /// - /// Abstraction of skipping to cope with streams which can't really skip. + /// Reads a top-level message or a nested message after the limits for this message have been pushed. + /// (parser will proceed until the end of the current limit) + /// NOTE: this method needs to be public because it's invoked by the generated code - e.g. msg.MergeFrom(CodedInputStream input) method /// - private void SkipImpl(int amountToSkip) + public void ReadRawMessage(IMessage message) { - if (input.CanSeek) + var ctx = new ParseContext(this); + try { - long previousPosition = input.Position; - input.Position += amountToSkip; - if (input.Position != previousPosition + amountToSkip) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } + ParsingPrimitivesMessages.ReadRawMessage(ref ctx, message); } - else + finally { - byte[] skipBuffer = new byte[Math.Min(1024, amountToSkip)]; - while (amountToSkip > 0) - { - int bytesRead = input.Read(skipBuffer, 0, Math.Min(skipBuffer.Length, amountToSkip)); - if (bytesRead <= 0) - { - throw InvalidProtocolBufferException.TruncatedMessage(); - } - amountToSkip -= bytesRead; - } + ctx.CopyStateTo(this); } } #endregion diff --git a/csharp/src/Google.Protobuf/IBufferMessage.cs b/csharp/src/Google.Protobuf/IBufferMessage.cs new file mode 100644 index 0000000000..ddd49b1bb4 --- /dev/null +++ b/csharp/src/Google.Protobuf/IBufferMessage.cs @@ -0,0 +1,63 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +namespace Google.Protobuf +{ +#if GOOGLE_PROTOBUF_SUPPORT_SYSTEM_MEMORY + /// + /// Interface for a Protocol Buffers message, supporting + /// and + /// serialization operations. + /// + public interface IBufferMessage : IMessage + { + /// + /// Merges the data from the specified with the current message. + /// + /// See the user guide for precise merge semantics. + /// to read data from. Must not be null. + void MergeFrom(ref CodedInputReader input); + + /// + /// Internal implementation of merging data from given parse context into this message. + /// Users should never invoke this method directly. + /// + void MergeFrom_Internal(ref ParseContext ctx); + + /// + /// Writes the data to the given . + /// + /// to write the data to. Must not be null. + void WriteTo(ref CodedOutputWriter output); + } +#endif +} diff --git a/csharp/src/Google.Protobuf/ParseContext.cs b/csharp/src/Google.Protobuf/ParseContext.cs new file mode 100644 index 0000000000..4b0b76d453 --- /dev/null +++ b/csharp/src/Google.Protobuf/ParseContext.cs @@ -0,0 +1,322 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Buffers; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Security; +using System.Text; +using Google.Protobuf.Collections; + +namespace Google.Protobuf +{ + /// + /// An opaque struct that represents the current parsing state and is passed along + /// as the parsing proceeds. + /// All the public methods are intended to be invoked only by the generated code, + /// users should never invoke them directly. + /// + [SecuritySafeCritical] + public ref struct ParseContext + { + internal const int DefaultRecursionLimit = 100; + internal const int DefaultSizeLimit = Int32.MaxValue; + + internal ReadOnlySpan buffer; + internal ParserInternalState state; + + internal ParseContext(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + this.buffer = buffer; + this.state = state; + } + + /// + /// Creates a ParseContext instance from CodedInputStream. + /// WARNING: internally this copies the CodedInputStream's state, so after done with the ParseContext, + /// the CodedInputStream's state needs to be updated. + /// + internal ParseContext(CodedInputStream input) + { + this.buffer = new ReadOnlySpan(input.InternalBuffer); + // TODO: ideally we would use a reference to the original state, but that doesn't seem possible + this.state = input.InternalState; // creates copy of the state + } + + internal ParseContext(ReadOnlySequence input) : this(input, DefaultRecursionLimit) + { + } + + internal ParseContext(ReadOnlySequence input, int recursionLimit) + { + this.buffer = default; + this.state = default; + this.state.lastTag = 0; + this.state.recursionDepth = 0; + this.state.sizeLimit = DefaultSizeLimit; + this.state.recursionLimit = recursionLimit; + this.state.currentLimit = int.MaxValue; + this.state.segmentedBufferHelper = new SegmentedBufferHelper(input, out this.buffer); + this.state.bufferPos = 0; + this.state.bufferSize = this.buffer.Length; + this.state.codedInputStream = null; + + this.state.DiscardUnknownFields = false; + this.state.ExtensionRegistry = null; + } + + /// + /// Returns the last tag read, or 0 if no tags have been read or we've read beyond + /// the end of the input. + /// + internal uint LastTag { get { return state.lastTag; } } + + /// + /// Internal-only property; when set to true, unknown fields will be discarded while parsing. + /// + internal bool DiscardUnknownFields { + get { return state.DiscardUnknownFields; } + set { state.DiscardUnknownFields = value; } + } + + /// + /// Internal-only property; provides extension identifiers to compatible messages while parsing. + /// + internal ExtensionRegistry ExtensionRegistry + { + get { return state.ExtensionRegistry; } + set { state.ExtensionRegistry = value; } + } + + /// + /// Reads a field tag, returning the tag of 0 for "end of input". + /// + /// + /// If this method returns 0, it doesn't necessarily mean the end of all + /// the data in this CodedInputReader; it may be the end of the logical input + /// for an embedded message, for example. + /// + /// The next field tag, or 0 for end of input. (0 is never a valid tag.) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + + public uint ReadTag() + { + return ParsingPrimitives.ParseTag(ref buffer, ref state); + } + + /// + /// Reads a double field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public double ReadDouble() + { + return ParsingPrimitives.ParseDouble(ref buffer, ref state); + } + + /// + /// Reads a float field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public float ReadFloat() + { + return ParsingPrimitives.ParseFloat(ref buffer, ref state); + } + + /// + /// Reads a uint64 field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ulong ReadUInt64() + { + return ParsingPrimitives.ParseRawVarint64(ref buffer, ref state); + } + + /// + /// Reads an int64 field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public long ReadInt64() + { + return (long)ParsingPrimitives.ParseRawVarint64(ref buffer, ref state); + } + + /// + /// Reads an int32 field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int ReadInt32() + { + return (int)ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + + /// + /// Reads a fixed64 field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ulong ReadFixed64() + { + return ParsingPrimitives.ParseRawLittleEndian64(ref buffer, ref state); + } + + /// + /// Reads a fixed32 field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public uint ReadFixed32() + { + return ParsingPrimitives.ParseRawLittleEndian32(ref buffer, ref state); + } + + /// + /// Reads a bool field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool ReadBool() + { + return ParsingPrimitives.ParseRawVarint64(ref buffer, ref state) != 0; + } + /// + /// Reads a string field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public string ReadString() + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + return ParsingPrimitives.ReadRawString(ref buffer, ref state, length); + } + + /// + /// Reads an embedded message field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ReadMessage(IMessage message) + { + // TODO: add a fallback if IMessage does not implement IBufferMessage + ParsingPrimitivesMessages.ReadMessage(ref this, message); + } + + /// + /// Reads an embedded group field from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ReadGroup(IMessage message) + { + ParsingPrimitivesMessages.ReadGroup(ref this, message); + } + + /// + /// Reads a bytes field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ByteString ReadBytes() + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + return ByteString.AttachBytes(ParsingPrimitives.ReadRawBytes(ref buffer, ref state, length)); + } + /// + /// Reads a uint32 field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public uint ReadUInt32() + { + return ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + + /// + /// Reads an enum field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int ReadEnum() + { + // Currently just a pass-through, but it's nice to separate it logically from WriteInt32. + return (int)ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + + /// + /// Reads an sfixed32 field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int ReadSFixed32() + { + return (int)ParsingPrimitives.ParseRawLittleEndian32(ref buffer, ref state); + } + + /// + /// Reads an sfixed64 field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public long ReadSFixed64() + { + return (long)ParsingPrimitives.ParseRawLittleEndian64(ref buffer, ref state); + } + + /// + /// Reads an sint32 field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int ReadSInt32() + { + return ParsingPrimitives.DecodeZigZag32(ParsingPrimitives.ParseRawVarint32(ref buffer, ref state)); + } + + /// + /// Reads an sint64 field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public long ReadSInt64() + { + return ParsingPrimitives.DecodeZigZag64(ParsingPrimitives.ParseRawVarint64(ref buffer, ref state)); + } + + /// + /// Reads a length for length-delimited data. + /// + /// + /// This is internally just reading a varint, but this method exists + /// to make the calling code clearer. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int ReadLength() + { + return (int)ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + + internal void CopyStateTo(CodedInputStream input) + { + input.InternalState = state; + } + } +} \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/ParserInternalState.cs b/csharp/src/Google.Protobuf/ParserInternalState.cs new file mode 100644 index 0000000000..5875173d8a --- /dev/null +++ b/csharp/src/Google.Protobuf/ParserInternalState.cs @@ -0,0 +1,116 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Buffers; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Security; +using System.Text; +using Google.Protobuf.Collections; + +namespace Google.Protobuf +{ + + // warning: this is a mutable struct, so it needs to be only passed as a ref! + internal struct ParserInternalState + { + // NOTE: the Span representing the current buffer is kept separate so that this doesn't have to be a ref struct and so it can live + // be included in CodedInputStream's internal state + + /// + /// The position within the current buffer (i.e. the next byte to read) + /// + internal int bufferPos; + + /// + /// Size of the current buffer + /// + internal int bufferSize; + + /// + /// If we are currently inside a length-delimited block, this is the number of + /// bytes in the buffer that are still available once we leave the delimited block. + /// + internal int bufferSizeAfterLimit; + + /// + /// The absolute position of the end of the current length-delimited block (including totalBytesRetired) + /// + internal int currentLimit; + + /// + /// The total number of consumed before the start of the current buffer. The + /// total bytes read up to the current position can be computed as + /// totalBytesRetired + bufferPos. + /// + internal int totalBytesRetired; + + internal int recursionDepth; // current recursion depth + + internal SegmentedBufferHelper segmentedBufferHelper; + + // TODO: remember if this context is supposed to call MergeFrom(CodedInputStream cis) or MergeFrom(ref ParseContext ....) for submessages. + // if non-null, the top level parse method was started with given cis as an argument + internal CodedInputStream codedInputStream; + + + /// + /// The last tag we read. 0 indicates we've read to the end of the stream + /// (or haven't read anything yet). + /// + internal uint lastTag; + + /// + /// The next tag, used to store the value read by PeekTag. + /// + internal uint nextTag; + internal bool hasNextTag; + + // TODO: these fields are configuration, they should be readonly + internal int sizeLimit; + internal int recursionLimit; + + /// + /// Internal-only property; when set to true, unknown fields will be discarded while parsing. + /// + internal bool DiscardUnknownFields { get; set; } + + /// + /// Internal-only property; provides extension identifiers to compatible messages while parsing. + /// + internal ExtensionRegistry ExtensionRegistry { get; set; } + } +} \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/ParsingPrimitives.cs b/csharp/src/Google.Protobuf/ParsingPrimitives.cs new file mode 100644 index 0000000000..93852344ed --- /dev/null +++ b/csharp/src/Google.Protobuf/ParsingPrimitives.cs @@ -0,0 +1,726 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Buffers; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Security; +using System.Text; +using Google.Protobuf.Collections; + +namespace Google.Protobuf +{ + /// + /// Primitives for parsing protobuf wire format. + /// + internal static class ParsingPrimitives + { + + /// + /// Reads a length for length-delimited data. + /// + /// + /// This is internally just reading a varint, but this method exists + /// to make the calling code clearer. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + + public static int ParseLength(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + return (int)ParseRawVarint32(ref buffer, ref state); + } + + /// + /// Parses the next tag. + /// If the end of logical stream was reached, an invalid tag of 0 is returned. + /// + public static uint ParseTag(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + // The "nextTag" logic is there only as an optimization for reading non-packed repeated / map + // fields and is strictly speaking not necessary. + // TODO(jtattermusch): look into simplifying the ParseTag logic. + if (state.hasNextTag) + { + state.lastTag = state.nextTag; + state.hasNextTag = false; + return state.lastTag; + } + + // Optimize for the incredibly common case of having at least two bytes left in the buffer, + // and those two bytes being enough to get the tag. This will be true for fields up to 4095. + if (state.bufferPos + 2 <= state.bufferSize) + { + int tmp = buffer[state.bufferPos++]; + if (tmp < 128) + { + state.lastTag = (uint)tmp; + } + else + { + int result = tmp & 0x7f; + if ((tmp = buffer[state.bufferPos++]) < 128) + { + result |= tmp << 7; + state.lastTag = (uint) result; + } + else + { + // Nope, rewind and go the potentially slow route. + state.bufferPos -= 2; + state.lastTag = ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + } + } + else + { + if (SegmentedBufferHelper.IsAtEnd(ref buffer, ref state)) + { + state.lastTag = 0; + return 0; + } + + state.lastTag = ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + if (WireFormat.GetTagFieldNumber(state.lastTag) == 0) + { + // If we actually read a tag with a field of 0, that's not a valid tag. + throw InvalidProtocolBufferException.InvalidTag(); + } + return state.lastTag; + } + + /// + /// Peeks at the next tag in the stream. If it matches , + /// the tag is consumed and the method returns true; otherwise, the + /// stream is left in the original position and the method returns false. + /// + public static bool MaybeConsumeTag(ref ReadOnlySpan buffer, ref ParserInternalState state, uint tag) + { + if (PeekTag(ref buffer, ref state) == tag) + { + state.hasNextTag = false; + return true; + } + return false; + } + + /// + /// Peeks at the next field tag. This is like calling , but the + /// tag is not consumed. (So a subsequent call to will return the + /// same value.) + /// + public static uint PeekTag(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + if (state.hasNextTag) + { + return state.nextTag; + } + + uint savedLast = state.lastTag; + state.nextTag = ParseTag(ref buffer, ref state); + state.hasNextTag = true; + state.lastTag = savedLast; // Undo the side effect of ReadTag + return state.nextTag; + } + + /// + /// Parses a raw varint. + /// + public static ulong ParseRawVarint64(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + if (state.bufferPos + 10 > state.bufferSize) + { + return ParseRawVarint64SlowPath(ref buffer, ref state); + } + + ulong result = buffer[state.bufferPos++]; + if (result < 128) + { + return result; + } + result &= 0x7f; + int shift = 7; + do + { + byte b = buffer[state.bufferPos++]; + result |= (ulong)(b & 0x7F) << shift; + if (b < 0x80) + { + return result; + } + shift += 7; + } + while (shift < 64); + + throw InvalidProtocolBufferException.MalformedVarint(); + } + + private static ulong ParseRawVarint64SlowPath(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int shift = 0; + ulong result = 0; + do + { + byte b = ReadRawByte(ref buffer, ref state); + result |= (ulong)(b & 0x7F) << shift; + if (b < 0x80) + { + return result; + } + shift += 7; + } + while (shift < 64); + + throw InvalidProtocolBufferException.MalformedVarint(); + } + + /// + /// Parses a raw Varint. If larger than 32 bits, discard the upper bits. + /// This method is optimised for the case where we've got lots of data in the buffer. + /// That means we can check the size just once, then just read directly from the buffer + /// without constant rechecking of the buffer length. + /// + public static uint ParseRawVarint32(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + if (state.bufferPos + 5 > state.bufferSize) + { + return ParseRawVarint32SlowPath(ref buffer, ref state); + } + + int tmp = buffer[state.bufferPos++]; + if (tmp < 128) + { + return (uint)tmp; + } + int result = tmp & 0x7f; + if ((tmp = buffer[state.bufferPos++]) < 128) + { + result |= tmp << 7; + } + else + { + result |= (tmp & 0x7f) << 7; + if ((tmp = buffer[state.bufferPos++]) < 128) + { + result |= tmp << 14; + } + else + { + result |= (tmp & 0x7f) << 14; + if ((tmp = buffer[state.bufferPos++]) < 128) + { + result |= tmp << 21; + } + else + { + result |= (tmp & 0x7f) << 21; + result |= (tmp = buffer[state.bufferPos++]) << 28; + if (tmp >= 128) + { + // Discard upper 32 bits. + // Note that this has to use ReadRawByte() as we only ensure we've + // got at least 5 bytes at the start of the method. This lets us + // use the fast path in more cases, and we rarely hit this section of code. + for (int i = 0; i < 5; i++) + { + if (ReadRawByte(ref buffer, ref state) < 128) + { + return (uint) result; + } + } + throw InvalidProtocolBufferException.MalformedVarint(); + } + } + } + } + return (uint)result; + } + + private static uint ParseRawVarint32SlowPath(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int tmp = ReadRawByte(ref buffer, ref state); + if (tmp < 128) + { + return (uint) tmp; + } + int result = tmp & 0x7f; + if ((tmp = ReadRawByte(ref buffer, ref state)) < 128) + { + result |= tmp << 7; + } + else + { + result |= (tmp & 0x7f) << 7; + if ((tmp = ReadRawByte(ref buffer, ref state)) < 128) + { + result |= tmp << 14; + } + else + { + result |= (tmp & 0x7f) << 14; + if ((tmp = ReadRawByte(ref buffer, ref state)) < 128) + { + result |= tmp << 21; + } + else + { + result |= (tmp & 0x7f) << 21; + result |= (tmp = ReadRawByte(ref buffer, ref state)) << 28; + if (tmp >= 128) + { + // Discard upper 32 bits. + for (int i = 0; i < 5; i++) + { + if (ReadRawByte(ref buffer, ref state) < 128) + { + return (uint) result; + } + } + throw InvalidProtocolBufferException.MalformedVarint(); + } + } + } + } + return (uint) result; + } + + /// + /// Parses a 32-bit little-endian integer. + /// + public static uint ParseRawLittleEndian32(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + const int length = sizeof(uint); + if (state.bufferPos + length > state.bufferSize) + { + return ParseRawLittleEndian32SlowPath(ref buffer, ref state); + } + uint result = BinaryPrimitives.ReadUInt32LittleEndian(buffer.Slice(state.bufferPos, length)); + state.bufferPos += length; + return result; + } + + private static uint ParseRawLittleEndian32SlowPath(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + uint b1 = ReadRawByte(ref buffer, ref state); + uint b2 = ReadRawByte(ref buffer, ref state); + uint b3 = ReadRawByte(ref buffer, ref state); + uint b4 = ReadRawByte(ref buffer, ref state); + return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24); + } + + /// + /// Parses a 64-bit little-endian integer. + /// + public static ulong ParseRawLittleEndian64(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + const int length = sizeof(ulong); + if (state.bufferPos + length > state.bufferSize) + { + return ParseRawLittleEndian64SlowPath(ref buffer, ref state); + } + ulong result = BinaryPrimitives.ReadUInt64LittleEndian(buffer.Slice(state.bufferPos, length)); + state.bufferPos += length; + return result; + } + + private static ulong ParseRawLittleEndian64SlowPath(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + ulong b1 = ReadRawByte(ref buffer, ref state); + ulong b2 = ReadRawByte(ref buffer, ref state); + ulong b3 = ReadRawByte(ref buffer, ref state); + ulong b4 = ReadRawByte(ref buffer, ref state); + ulong b5 = ReadRawByte(ref buffer, ref state); + ulong b6 = ReadRawByte(ref buffer, ref state); + ulong b7 = ReadRawByte(ref buffer, ref state); + ulong b8 = ReadRawByte(ref buffer, ref state); + return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24) + | (b5 << 32) | (b6 << 40) | (b7 << 48) | (b8 << 56); + } + + /// + /// Parses a double value. + /// + public static double ParseDouble(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + const int length = sizeof(double); + if (!BitConverter.IsLittleEndian || state.bufferPos + length > state.bufferSize) + { + return BitConverter.Int64BitsToDouble((long)ParseRawLittleEndian64(ref buffer, ref state)); + } + // ReadUnaligned uses processor architecture for endianness. + double result = Unsafe.ReadUnaligned(ref MemoryMarshal.GetReference(buffer.Slice(state.bufferPos, length))); + state.bufferPos += length; + return result; + } + + /// + /// Parses a float value. + /// + public static float ParseFloat(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + const int length = sizeof(float); + if (!BitConverter.IsLittleEndian || state.bufferPos + length > state.bufferSize) + { + return ParseFloatSlow(ref buffer, ref state); + } + // ReadUnaligned uses processor architecture for endianness. + float result = Unsafe.ReadUnaligned(ref MemoryMarshal.GetReference(buffer.Slice(state.bufferPos, length))); + state.bufferPos += length; + return result; + } + + private static unsafe float ParseFloatSlow(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + const int length = sizeof(float); + byte* stackBuffer = stackalloc byte[length]; + Span tempSpan = new Span(stackBuffer, length); + for (int i = 0; i < length; i++) + { + tempSpan[i] = ReadRawByte(ref buffer, ref state); + } + + // Content is little endian. Reverse if needed to match endianness of architecture. + if (!BitConverter.IsLittleEndian) + { + tempSpan.Reverse(); + } + return Unsafe.ReadUnaligned(ref MemoryMarshal.GetReference(tempSpan)); + } + + /// + /// Reads a fixed size of bytes from the input. + /// + /// + /// the end of the stream or the current limit was reached + /// + public static byte[] ReadRawBytes(ref ReadOnlySpan buffer, ref ParserInternalState state, int size) + { + if (size < 0) + { + throw InvalidProtocolBufferException.NegativeSize(); + } + + if (state.totalBytesRetired + state.bufferPos + size > state.currentLimit) + { + // Read to the end of the stream (up to the current limit) anyway. + SkipRawBytes(ref buffer, ref state, state.currentLimit - state.totalBytesRetired - state.bufferPos); + // Then fail. + throw InvalidProtocolBufferException.TruncatedMessage(); + } + + if (size <= state.bufferSize - state.bufferPos) + { + // We have all the bytes we need already. + byte[] bytes = new byte[size]; + buffer.Slice(state.bufferPos, size).CopyTo(bytes); + state.bufferPos += size; + return bytes; + } + else if (size < buffer.Length || size < state.segmentedBufferHelper.TotalLength) + { + // Reading more bytes than are in the buffer, but not an excessive number + // of bytes. We can safely allocate the resulting array ahead of time. + + // First copy what we have. + byte[] bytes = new byte[size]; + var bytesSpan = new Span(bytes); + int pos = state.bufferSize - state.bufferPos; + buffer.Slice(state.bufferPos, pos).CopyTo(bytesSpan.Slice(0, pos)); + state.bufferPos = state.bufferSize; + + // We want to use RefillBuffer() and then copy from the buffer into our + // byte array rather than reading directly into our byte array because + // the input may be unbuffered. + state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true); + + while (size - pos > state.bufferSize) + { + buffer.Slice(0, state.bufferSize) + .CopyTo(bytesSpan.Slice(pos, state.bufferSize)); + pos += state.bufferSize; + state.bufferPos = state.bufferSize; + state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true); + } + + buffer.Slice(0, size - pos) + .CopyTo(bytesSpan.Slice(pos, size - pos)); + state.bufferPos = size - pos; + + return bytes; + } + else + { + // The size is very large. For security reasons, we can't allocate the + // entire byte array yet. The size comes directly from the input, so a + // maliciously-crafted message could provide a bogus very large size in + // order to trick the app into allocating a lot of memory. We avoid this + // by allocating and reading only a small chunk at a time, so that the + // malicious message must actually *be* extremely large to cause + // problems. Meanwhile, we limit the allowed size of a message elsewhere. + + List chunks = new List(); + + int pos = state.bufferSize - state.bufferPos; + byte[] firstChunk = new byte[pos]; + buffer.Slice(state.bufferPos, pos).CopyTo(firstChunk); + chunks.Add(firstChunk); + state.bufferPos = state.bufferSize; + + // Read all the rest of the bytes we need. + int sizeLeft = size - pos; + while (sizeLeft > 0) + { + state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true); + byte[] chunk = new byte[Math.Min(sizeLeft, state.bufferSize)]; + + buffer.Slice(0, chunk.Length) + .CopyTo(chunk); + state.bufferPos += chunk.Length; + sizeLeft -= chunk.Length; + chunks.Add(chunk); + } + + // OK, got everything. Now concatenate it all into one buffer. + byte[] bytes = new byte[size]; + int newPos = 0; + foreach (byte[] chunk in chunks) + { + Buffer.BlockCopy(chunk, 0, bytes, newPos, chunk.Length); + newPos += chunk.Length; + } + + // Done. + return bytes; + } + } + + /// + /// Reads and discards bytes. + /// + /// the end of the stream + /// or the current limit was reached + public static void SkipRawBytes(ref ReadOnlySpan buffer, ref ParserInternalState state, int size) + { + if (size < 0) + { + throw InvalidProtocolBufferException.NegativeSize(); + } + + if (state.totalBytesRetired + state.bufferPos + size > state.currentLimit) + { + // Read to the end of the stream anyway. + SkipRawBytes(ref buffer, ref state, state.currentLimit - state.totalBytesRetired - state.bufferPos); + // Then fail. + throw InvalidProtocolBufferException.TruncatedMessage(); + } + + if (size <= state.bufferSize - state.bufferPos) + { + // We have all the bytes we need already. + state.bufferPos += size; + } + else + { + // TODO: do we need to support skipping in seekable Streams? + + // Skipping more bytes than are in the buffer. First skip what we have. + int pos = state.bufferSize - state.bufferPos; + state.bufferPos = state.bufferSize; + + state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true); + + while (size - pos > state.bufferSize) + { + pos += state.bufferSize; + state.bufferPos = state.bufferSize; + state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true); + } + + state.bufferPos = size - pos; + } + } + + public static string ReadString(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + return ParsingPrimitives.ReadRawString(ref buffer, ref state, length); + } + + /// + /// Reads a bytes field value from the input. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ByteString ReadBytes(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + return ByteString.AttachBytes(ParsingPrimitives.ReadRawBytes(ref buffer, ref state, length)); + } + + /// + /// Reads a UTF-8 string from the next "length" bytes. + /// + /// + /// the end of the stream or the current limit was reached + /// + public static string ReadRawString(ref ReadOnlySpan buffer, ref ParserInternalState state, int length) + { + // No need to read any data for an empty string. + if (length == 0) + { + return string.Empty; + } + + if (length < 0) + { + throw InvalidProtocolBufferException.NegativeSize(); + } + +#if GOOGLE_PROTOBUF_SUPPORT_FAST_STRING + if (length <= state.bufferSize - state.bufferPos && length > 0) + { + // Fast path: all bytes to decode appear in the same span. + ReadOnlySpan data = buffer.Slice(state.bufferPos, length); + + string value; + unsafe + { + fixed (byte* sourceBytes = &MemoryMarshal.GetReference(data)) + { + value = CodedOutputStream.Utf8Encoding.GetString(sourceBytes, length); + } + } + + state.bufferPos += length; + return value; + } +#endif + + // TODO: what if GOOGLE_PROTOBUF_SUPPORT_FAST_STRING is not supported? + // -> can we still try to grab an array from the span? + // if (length <= state.bufferSize - state.bufferPos && length > 0) + // { + // // Fast path: We already have the bytes in a contiguous buffer, so + // // just copy directly from it. + // String result = CodedOutputStream.Utf8Encoding.GetString(buffer, state.bufferPos, length); + // state.bufferPos += length; + // return result; + // } + + // TODO: creating a char[] and decoding into it and then creating a string from that array might be more efficient + // Slow path: Build a byte array first then copy it. + return CodedOutputStream.Utf8Encoding.GetString(ReadRawBytes(ref buffer, ref state, length), 0, length); + } + + private static byte ReadRawByte(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + if (state.bufferPos == state.bufferSize) + { + state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true); + } + return buffer[state.bufferPos++]; + } + + /// + /// Reads a varint from the input one byte at a time, so that it does not + /// read any bytes after the end of the varint. If you simply wrapped the + /// stream in a CodedInputStream and used ReadRawVarint32(Stream) + /// then you would probably end up reading past the end of the varint since + /// CodedInputStream buffers its input. + /// + /// + /// + public static uint ReadRawVarint32(Stream input) + { + int result = 0; + int offset = 0; + for (; offset < 32; offset += 7) + { + int b = input.ReadByte(); + if (b == -1) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + result |= (b & 0x7f) << offset; + if ((b & 0x80) == 0) + { + return (uint) result; + } + } + // Keep reading up to 64 bits. + for (; offset < 64; offset += 7) + { + int b = input.ReadByte(); + if (b == -1) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + if ((b & 0x80) == 0) + { + return (uint) result; + } + } + throw InvalidProtocolBufferException.MalformedVarint(); + } + + /// + /// Decode a 32-bit value with ZigZag encoding. + /// + /// + /// ZigZag encodes signed integers into values that can be efficiently + /// encoded with varint. (Otherwise, negative values must be + /// sign-extended to 32 bits to be varint encoded, thus always taking + /// 5 bytes on the wire.) + /// + public static int DecodeZigZag32(uint n) + { + return (int)(n >> 1) ^ -(int)(n & 1); + } + + /// + /// Decode a 64-bit value with ZigZag encoding. + /// + /// + /// ZigZag encodes signed integers into values that can be efficiently + /// encoded with varint. (Otherwise, negative values must be + /// sign-extended to 64 bits to be varint encoded, thus always taking + /// 10 bytes on the wire.) + /// + public static long DecodeZigZag64(ulong n) + { + return (long)(n >> 1) ^ -(long)(n & 1); + } + } +} \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/ParsingPrimitivesMessages.cs b/csharp/src/Google.Protobuf/ParsingPrimitivesMessages.cs new file mode 100644 index 0000000000..5be010cc44 --- /dev/null +++ b/csharp/src/Google.Protobuf/ParsingPrimitivesMessages.cs @@ -0,0 +1,231 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Buffers; +using System.IO; +using System.Runtime.CompilerServices; + +namespace Google.Protobuf +{ + /// + /// Reading and skipping messages / groups + /// + internal static class ParsingPrimitivesMessages + { + public static void SkipLastField(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + if (state.lastTag == 0) + { + throw new InvalidOperationException("SkipLastField cannot be called at the end of a stream"); + } + switch (WireFormat.GetTagWireType(state.lastTag)) + { + case WireFormat.WireType.StartGroup: + SkipGroup(ref buffer, ref state, state.lastTag); + break; + case WireFormat.WireType.EndGroup: + throw new InvalidProtocolBufferException( + "SkipLastField called on an end-group tag, indicating that the corresponding start-group was missing"); + case WireFormat.WireType.Fixed32: + ParsingPrimitives.ParseRawLittleEndian32(ref buffer, ref state); + break; + case WireFormat.WireType.Fixed64: + ParsingPrimitives.ParseRawLittleEndian64(ref buffer, ref state); + break; + case WireFormat.WireType.LengthDelimited: + var length = ParsingPrimitives.ParseLength(ref buffer, ref state); + ParsingPrimitives.SkipRawBytes(ref buffer, ref state, length); + break; + case WireFormat.WireType.Varint: + ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + break; + } + } + + /// + /// Skip a group. + /// + public static void SkipGroup(ref ReadOnlySpan buffer, ref ParserInternalState state, uint startGroupTag) + { + // Note: Currently we expect this to be the way that groups are read. We could put the recursion + // depth changes into the ReadTag method instead, potentially... + state.recursionDepth++; + if (state.recursionDepth >= state.recursionLimit) + { + throw InvalidProtocolBufferException.RecursionLimitExceeded(); + } + uint tag; + while (true) + { + tag = ParsingPrimitives.ParseTag(ref buffer, ref state); + if (tag == 0) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + // Can't call SkipLastField for this case- that would throw. + if (WireFormat.GetTagWireType(tag) == WireFormat.WireType.EndGroup) + { + break; + } + // This recursion will allow us to handle nested groups. + SkipLastField(ref buffer, ref state); + } + int startField = WireFormat.GetTagFieldNumber(startGroupTag); + int endField = WireFormat.GetTagFieldNumber(tag); + if (startField != endField) + { + throw new InvalidProtocolBufferException( + $"Mismatched end-group tag. Started with field {startField}; ended with field {endField}"); + } + state.recursionDepth--; + } + + public static void ReadMessage(ref CodedInputReader ctx, IMessage message) + { + int length = ParsingPrimitives.ParseLength(ref ctx.buffer, ref ctx.state); + if (ctx.state.recursionDepth >= ctx.state.recursionLimit) + { + throw InvalidProtocolBufferException.RecursionLimitExceeded(); + } + int oldLimit = SegmentedBufferHelper.PushLimit(ref ctx.state, length); + ++ctx.state.recursionDepth; + + ReadRawMessage(ref ctx, message); + + CheckReadEndOfStreamTag(ref ctx.state); + // Check that we've read exactly as much data as expected. + if (!SegmentedBufferHelper.IsReachedLimit(ref ctx.state)) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + --ctx.state.recursionDepth; + SegmentedBufferHelper.PopLimit(ref ctx.state, oldLimit); + } + + public static void ReadMessage(ref ParseContext ctx, IMessage message) + { + int length = ParsingPrimitives.ParseLength(ref ctx.buffer, ref ctx.state); + if (ctx.state.recursionDepth >= ctx.state.recursionLimit) + { + throw InvalidProtocolBufferException.RecursionLimitExceeded(); + } + int oldLimit = SegmentedBufferHelper.PushLimit(ref ctx.state, length); + ++ctx.state.recursionDepth; + + ReadRawMessage(ref ctx, message); + + CheckReadEndOfStreamTag(ref ctx.state); + // Check that we've read exactly as much data as expected. + if (!SegmentedBufferHelper.IsReachedLimit(ref ctx.state)) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + --ctx.state.recursionDepth; + SegmentedBufferHelper.PopLimit(ref ctx.state, oldLimit); + } + + public static void ReadGroup(ref CodedInputReader ctx, IMessage message) + { + if (ctx.state.recursionDepth >= ctx.state.recursionLimit) + { + throw InvalidProtocolBufferException.RecursionLimitExceeded(); + } + ++ctx.state.recursionDepth; + + ReadRawMessage(ref ctx, message); + + --ctx.state.recursionDepth; + } + + public static void ReadGroup(ref ParseContext ctx, IMessage message) + { + if (ctx.state.recursionDepth >= ctx.state.recursionLimit) + { + throw InvalidProtocolBufferException.RecursionLimitExceeded(); + } + ++ctx.state.recursionDepth; + + ReadRawMessage(ref ctx, message); + + --ctx.state.recursionDepth; + } + + public static void ReadRawMessage(ref CodedInputReader ctx, IMessage message) + { + if (message is IBufferMessage bufferMessage) + { + bufferMessage.MergeFrom(ref ctx); + } + else + { + if (ctx.state.codedInputStream == null) + { + // TODO: improve the msg + throw new InvalidProtocolBufferException("Cannot parse message with current parse context. Do you need to regenerate the code?"); + } + message.MergeFrom(ctx.state.codedInputStream); + } + } + + public static void ReadRawMessage(ref ParseContext ctx, IMessage message) + { + if (message is IBufferMessage bufferMessage) + { + bufferMessage.MergeFrom_Internal(ref ctx); + } + else + { + if (ctx.state.codedInputStream == null) + { + // TODO: improve the msg + throw new InvalidProtocolBufferException("Cannot parse message with current parse context. Do you need to regenerate the code?"); + } + message.MergeFrom(ctx.state.codedInputStream); + } + } + + /// + /// Verifies that the last call to ReadTag() returned tag 0 - in other words, + /// we've reached the end of the stream when we expected to. + /// + /// The + /// tag read was not the one specified + public static void CheckReadEndOfStreamTag(ref ParserInternalState state) + { + if (state.lastTag != 0) + { + throw InvalidProtocolBufferException.MoreDataAvailable(); + } + } + } +} \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/ParsingPrimitivesWrappers.cs b/csharp/src/Google.Protobuf/ParsingPrimitivesWrappers.cs new file mode 100644 index 0000000000..fce78ac588 --- /dev/null +++ b/csharp/src/Google.Protobuf/ParsingPrimitivesWrappers.cs @@ -0,0 +1,352 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Buffers; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Security; +using System.Text; +using Google.Protobuf.Collections; + +namespace Google.Protobuf +{ + /// + /// Fast parsing primitives for wrapper types + /// + internal static class ParsingPrimitivesWrappers + { + internal static float? ReadFloatWrapperLittleEndian(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + // length:1 + tag:1 + value:4 = 6 bytes + if (state.bufferPos + 6 <= state.bufferSize) + { + // The entire wrapper message is already contained in `buffer`. + int length = buffer[state.bufferPos]; + if (length == 0) + { + state.bufferPos++; + return 0F; + } + // tag:1 + value:4 = length of 5 bytes + // field=1, type=32-bit = tag of 13 + if (length != 5 || buffer[state.bufferPos + 1] != 13) + { + return ReadFloatWrapperSlow(ref buffer, ref state); + } + state.bufferPos += 2; + return ParsingPrimitives.ParseFloat(ref buffer, ref state); + } + else + { + return ReadFloatWrapperSlow(ref buffer, ref state); + } + } + + internal static float? ReadFloatWrapperSlow(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + if (length == 0) + { + return 0F; + } + int finalBufferPos = state.totalBytesRetired + state.bufferPos + length; + float result = 0F; + do + { + // field=1, type=32-bit = tag of 13 + if (ParsingPrimitives.ParseTag(ref buffer, ref state) == 13) + { + result = ParsingPrimitives.ParseFloat(ref buffer, ref state); + } + else + { + ParsingPrimitivesMessages.SkipLastField(ref buffer, ref state); + } + } + while (state.totalBytesRetired + state.bufferPos < finalBufferPos); + return result; + } + + internal static double? ReadDoubleWrapperLittleEndian(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + // length:1 + tag:1 + value:8 = 10 bytes + if (state.bufferPos + 10 <= state.bufferSize) + { + // The entire wrapper message is already contained in `buffer`. + int length = buffer[state.bufferPos]; + if (length == 0) + { + state.bufferPos++; + return 0D; + } + // tag:1 + value:8 = length of 9 bytes + // field=1, type=64-bit = tag of 9 + if (length != 9 || buffer[state.bufferPos + 1] != 9) + { + return ReadDoubleWrapperSlow(ref buffer, ref state); + } + state.bufferPos += 2; + return ParsingPrimitives.ParseDouble(ref buffer, ref state); + } + else + { + return ReadDoubleWrapperSlow(ref buffer, ref state); + } + } + + internal static double? ReadDoubleWrapperSlow(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + if (length == 0) + { + return 0D; + } + int finalBufferPos = state.totalBytesRetired + state.bufferPos + length; + double result = 0D; + do + { + // field=1, type=64-bit = tag of 9 + if (ParsingPrimitives.ParseTag(ref buffer, ref state) == 9) + { + result = ParsingPrimitives.ParseDouble(ref buffer, ref state); + } + else + { + ParsingPrimitivesMessages.SkipLastField(ref buffer, ref state); + } + } + while (state.totalBytesRetired + state.bufferPos < finalBufferPos); + return result; + } + + internal static bool? ReadBoolWrapper(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + return ReadUInt64Wrapper(ref buffer, ref state) != 0; + } + + internal static uint? ReadUInt32Wrapper(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + // length:1 + tag:1 + value:5(varint32-max) = 7 bytes + if (state.bufferPos + 7 <= state.bufferSize) + { + // The entire wrapper message is already contained in `buffer`. + int pos0 = state.bufferPos; + int length = buffer[state.bufferPos++]; + if (length == 0) + { + return 0; + } + // Length will always fit in a single byte. + if (length >= 128) + { + state.bufferPos = pos0; + return ReadUInt32WrapperSlow(ref buffer, ref state); + } + int finalBufferPos = state.bufferPos + length; + // field=1, type=varint = tag of 8 + if (buffer[state.bufferPos++] != 8) + { + state.bufferPos = pos0; + return ReadUInt32WrapperSlow(ref buffer, ref state); + } + var result = ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + // Verify this message only contained a single field. + if (state.bufferPos != finalBufferPos) + { + state.bufferPos = pos0; + return ReadUInt32WrapperSlow(ref buffer, ref state); + } + return result; + } + else + { + return ReadUInt32WrapperSlow(ref buffer, ref state); + } + } + + internal static uint? ReadUInt32WrapperSlow(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + if (length == 0) + { + return 0; + } + int finalBufferPos = state.totalBytesRetired + state.bufferPos + length; + uint result = 0; + do + { + // field=1, type=varint = tag of 8 + if (ParsingPrimitives.ParseTag(ref buffer, ref state) == 8) + { + result = ParsingPrimitives.ParseRawVarint32(ref buffer, ref state); + } + else + { + ParsingPrimitivesMessages.SkipLastField(ref buffer, ref state); + } + } + while (state.totalBytesRetired + state.bufferPos < finalBufferPos); + return result; + } + + internal static int? ReadInt32Wrapper(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + return (int?)ReadUInt32Wrapper(ref buffer, ref state); + } + + internal static ulong? ReadUInt64Wrapper(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + // field=1, type=varint = tag of 8 + const int expectedTag = 8; + // length:1 + tag:1 + value:10(varint64-max) = 12 bytes + if (state.bufferPos + 12 <= state.bufferSize) + { + // The entire wrapper message is already contained in `buffer`. + int pos0 = state.bufferPos; + int length = buffer[state.bufferPos++]; + if (length == 0) + { + return 0L; + } + // Length will always fit in a single byte. + if (length >= 128) + { + state.bufferPos = pos0; + return ReadUInt64WrapperSlow(ref buffer, ref state); + } + int finalBufferPos = state.bufferPos + length; + if (buffer[state.bufferPos++] != expectedTag) + { + state.bufferPos = pos0; + return ReadUInt64WrapperSlow(ref buffer, ref state); + } + var result = ParsingPrimitives.ParseRawVarint64(ref buffer, ref state); + // Verify this message only contained a single field. + if (state.bufferPos != finalBufferPos) + { + state.bufferPos = pos0; + return ReadUInt64WrapperSlow(ref buffer, ref state); + } + return result; + } + else + { + return ReadUInt64WrapperSlow(ref buffer, ref state); + } + } + + internal static ulong? ReadUInt64WrapperSlow(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + // field=1, type=varint = tag of 8 + const int expectedTag = 8; + int length = ParsingPrimitives.ParseLength(ref buffer, ref state); + if (length == 0) + { + return 0L; + } + int finalBufferPos = state.totalBytesRetired + state.bufferPos + length; + ulong result = 0L; + do + { + if (ParsingPrimitives.ParseTag(ref buffer, ref state) == expectedTag) + { + result = ParsingPrimitives.ParseRawVarint64(ref buffer, ref state); + } + else + { + ParsingPrimitivesMessages.SkipLastField(ref buffer, ref state); + } + } + while (state.totalBytesRetired + state.bufferPos < finalBufferPos); + return result; + } + + internal static long? ReadInt64Wrapper(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + return (long?)ReadUInt64Wrapper(ref buffer, ref state); + } + + internal static float? ReadFloatWrapperLittleEndian(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadFloatWrapperLittleEndian(ref ctx.buffer, ref ctx.state); + } + + internal static float? ReadFloatWrapperSlow(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadFloatWrapperSlow(ref ctx.buffer, ref ctx.state); + } + + internal static double? ReadDoubleWrapperLittleEndian(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadDoubleWrapperLittleEndian(ref ctx.buffer, ref ctx.state); + } + + internal static double? ReadDoubleWrapperSlow(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadDoubleWrapperSlow(ref ctx.buffer, ref ctx.state); + } + + internal static bool? ReadBoolWrapper(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadBoolWrapper(ref ctx.buffer, ref ctx.state); + } + + internal static uint? ReadUInt32Wrapper(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadUInt32Wrapper(ref ctx.buffer, ref ctx.state); + } + + internal static int? ReadInt32Wrapper(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadInt32Wrapper(ref ctx.buffer, ref ctx.state); + } + + internal static ulong? ReadUInt64Wrapper(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadUInt64Wrapper(ref ctx.buffer, ref ctx.state); + } + + internal static ulong? ReadUInt64WrapperSlow(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadUInt64WrapperSlow(ref ctx.buffer, ref ctx.state); + } + + internal static long? ReadInt64Wrapper(ref ParseContext ctx) + { + return ParsingPrimitivesWrappers.ReadInt64Wrapper(ref ctx.buffer, ref ctx.state); + } + } +} \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/SegmentedBufferHelper.cs b/csharp/src/Google.Protobuf/SegmentedBufferHelper.cs new file mode 100644 index 0000000000..2c9db88e95 --- /dev/null +++ b/csharp/src/Google.Protobuf/SegmentedBufferHelper.cs @@ -0,0 +1,283 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Buffers; +using System.IO; +using System.Runtime.CompilerServices; + +namespace Google.Protobuf +{ + /// + /// Abstraction for reading from a stream / read only sequence. + /// Parsing from the buffer is a loop of reading from current buffer / refreshing the buffer once done. + /// + internal struct SegmentedBufferHelper + { + private readonly int? totalLength; + private ReadOnlySequence.Enumerator readOnlySequenceEnumerator; + private readonly CodedInputStream codedInputStream; + + public SegmentedBufferHelper(ReadOnlySequence sequence, out ReadOnlySpan firstSpan) + { + this.codedInputStream = null; + if (sequence.IsSingleSegment) + { + firstSpan = sequence.First.Span; + this.totalLength = firstSpan.Length; + this.readOnlySequenceEnumerator = default; + } + else + { + // TODO(jtattermusch): try to initialize the first segment, otherwise the + // very first read will result in slowpath (because the first thing to do is to + // refill to get the first buffer segment) + firstSpan = default; + this.totalLength = (int) sequence.Length; + this.readOnlySequenceEnumerator = sequence.GetEnumerator(); + } + } + + public SegmentedBufferHelper(CodedInputStream codedInputStream) + { + this.totalLength = codedInputStream.InternalInputStream == null ? (int?)codedInputStream.InternalBuffer.Length : null; + this.readOnlySequenceEnumerator = default; + this.codedInputStream = codedInputStream; + } + + public bool RefillBuffer(ref ReadOnlySpan buffer, ref ParserInternalState state, bool mustSucceed) + { + if (codedInputStream != null) + { + return RefillFromCodedInputStream(ref buffer, ref state, mustSucceed); + } + else + { + return RefillFromReadOnlySequence(ref buffer, ref state, mustSucceed); + } + } + + public int? TotalLength => totalLength; + + public CodedInputStream CodedInputStream => codedInputStream; + + /// + /// Sets currentLimit to (current position) + byteLimit. This is called + /// when descending into a length-delimited embedded message. The previous + /// limit is returned. + /// + /// The old limit. + public static int PushLimit(ref ParserInternalState state, int byteLimit) + { + if (byteLimit < 0) + { + throw InvalidProtocolBufferException.NegativeSize(); + } + byteLimit += state.totalBytesRetired + state.bufferPos; + int oldLimit = state.currentLimit; + if (byteLimit > oldLimit) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + state.currentLimit = byteLimit; + + RecomputeBufferSizeAfterLimit(ref state); + + return oldLimit; + } + + /// + /// Discards the current limit, returning the previous limit. + /// + public static void PopLimit(ref ParserInternalState state, int oldLimit) + { + state.currentLimit = oldLimit; + RecomputeBufferSizeAfterLimit(ref state); + } + + /// + /// Returns whether or not all the data before the limit has been read. + /// + /// + public static bool IsReachedLimit(ref ParserInternalState state) + { + if (state.currentLimit == int.MaxValue) + { + return false; + } + int currentAbsolutePosition = state.totalBytesRetired + state.bufferPos; + return currentAbsolutePosition >= state.currentLimit; + } + + /// + /// Returns true if the stream has reached the end of the input. This is the + /// case if either the end of the underlying input source has been reached or + /// the stream has reached a limit created using PushLimit. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsAtEnd(ref ReadOnlySpan buffer, ref ParserInternalState state) + { + return state.bufferPos == state.bufferSize && !state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, false); + } + + private bool RefillFromReadOnlySequence(ref ReadOnlySpan buffer, ref ParserInternalState state, bool mustSucceed) + { + CheckCurrentBufferIsEmpty(ref state); + + if (state.totalBytesRetired + state.bufferSize == state.currentLimit) + { + // Oops, we hit a limit. + if (mustSucceed) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + else + { + return false; + } + } + + state.totalBytesRetired += state.bufferSize; + + state.bufferPos = 0; + state.bufferSize = 0; + while (readOnlySequenceEnumerator.MoveNext()) + { + + buffer = readOnlySequenceEnumerator.Current.Span; + state.bufferSize = buffer.Length; + if (buffer.Length != 0) + { + break; + } + } + + if (state.bufferSize == 0) + { + if (mustSucceed) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + else + { + return false; + } + } + else + { + RecomputeBufferSizeAfterLimit(ref state); + int totalBytesRead = + state.totalBytesRetired + state.bufferSize + state.bufferSizeAfterLimit; + if (totalBytesRead < 0 || totalBytesRead > state.sizeLimit) + { + throw InvalidProtocolBufferException.SizeLimitExceeded(); + } + return true; + } + } + + private bool RefillFromCodedInputStream(ref ReadOnlySpan buffer, ref ParserInternalState state, bool mustSucceed) + { + CheckCurrentBufferIsEmpty(ref state); + + if (state.totalBytesRetired + state.bufferSize == state.currentLimit) + { + // Oops, we hit a limit. + if (mustSucceed) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + else + { + return false; + } + } + + Stream input = codedInputStream.InternalInputStream; + + state.totalBytesRetired += state.bufferSize; + + state.bufferPos = 0; + state.bufferSize = (input == null) ? 0 : input.Read(codedInputStream.InternalBuffer, 0, buffer.Length); + if (state.bufferSize < 0) + { + throw new InvalidOperationException("Stream.Read returned a negative count"); + } + if (state.bufferSize == 0) + { + if (mustSucceed) + { + throw InvalidProtocolBufferException.TruncatedMessage(); + } + else + { + return false; + } + } + else + { + RecomputeBufferSizeAfterLimit(ref state); + int totalBytesRead = + state.totalBytesRetired + state.bufferSize + state.bufferSizeAfterLimit; + if (totalBytesRead < 0 || totalBytesRead > state.sizeLimit) + { + throw InvalidProtocolBufferException.SizeLimitExceeded(); + } + return true; + } + } + + private static void RecomputeBufferSizeAfterLimit(ref ParserInternalState state) + { + state.bufferSize += state.bufferSizeAfterLimit; + int bufferEnd = state.totalBytesRetired + state.bufferSize; + if (bufferEnd > state.currentLimit) + { + // Limit is in current buffer. + state.bufferSizeAfterLimit = bufferEnd - state.currentLimit; + state.bufferSize -= state.bufferSizeAfterLimit; + } + else + { + state.bufferSizeAfterLimit = 0; + } + } + + private static void CheckCurrentBufferIsEmpty(ref ParserInternalState state) + { + if (state.bufferPos < state.bufferSize) + { + throw new InvalidOperationException("RefillBuffer() called when buffer wasn't empty."); + } + } + } +} \ No newline at end of file