Change ByteString to use memory and support unsafe create without copy

pull/7645/head
James Newton-King 5 years ago
parent 7b352d318b
commit 79f5bad83c
No known key found for this signature in database
GPG Key ID: A66B2F456BF5526
  1. 5
      Makefile.am
  2. 72
      csharp/src/Google.Protobuf.Benchmarks/ByteStringBenchmark.cs
  3. 64
      csharp/src/Google.Protobuf.Test/ByteStringTest.cs
  4. 142
      csharp/src/Google.Protobuf/ByteString.cs
  5. 64
      csharp/src/Google.Protobuf/ByteStringAsync.cs
  6. 81
      csharp/src/Google.Protobuf/UnsafeByteOperations.cs

@ -89,6 +89,7 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf.Benchmarks/BenchmarkDatasetConfig.cs \
csharp/src/Google.Protobuf.Benchmarks/BenchmarkMessage1Proto3.cs \
csharp/src/Google.Protobuf.Benchmarks/Benchmarks.cs \
csharp/src/Google.Protobuf.Benchmarks/ByteStringBenchmark.cs \
csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj \
csharp/src/Google.Protobuf.Benchmarks/GoogleMessageBenchmark.cs \
csharp/src/Google.Protobuf.Benchmarks/ParseMessagesBenchmark.cs \
@ -171,6 +172,7 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf.sln \
csharp/src/Google.Protobuf/ByteArray.cs \
csharp/src/Google.Protobuf/ByteString.cs \
csharp/src/Google.Protobuf/ByteStringAsync.cs \
csharp/src/Google.Protobuf/CodedInputStream.cs \
csharp/src/Google.Protobuf/CodedOutputStream.ComputeSize.cs \
csharp/src/Google.Protobuf/CodedOutputStream.cs \
@ -268,7 +270,8 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf/WriteContext.cs \
csharp/src/Google.Protobuf/WriteBufferHelper.cs \
csharp/src/Google.Protobuf/UnknownField.cs \
csharp/src/Google.Protobuf/UnknownFieldSet.cs
csharp/src/Google.Protobuf/UnknownFieldSet.cs \
csharp/src/Google.Protobuf/UnsafeByteOperations.cs
java_EXTRA_DIST= \
java/README.md \

@ -0,0 +1,72 @@
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2019 Google Inc. All rights reserved.
// https://github.com/protocolbuffers/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion
using BenchmarkDotNet.Attributes;
namespace Google.Protobuf.Benchmarks
{
/// <summary>
/// Benchmarks using ByteString.
/// </summary>
[MemoryDiagnoser]
public class ByteStringBenchmark
{
private const int Zero = 0;
private const int Kilobyte = 1024;
private const int _128Kilobytes = 1024 * 128;
private const int Megabyte = 1024 * 1024;
private const int _10Megabytes = 1024 * 1024 * 10;
byte[] byteBuffer;
[GlobalSetup]
public void GlobalSetup()
{
byteBuffer = new byte[PayloadSize];
}
[Params(Zero, Kilobyte, _128Kilobytes, Megabyte, _10Megabytes)]
public int PayloadSize { get; set; }
[Benchmark]
public ByteString CopyFrom()
{
return ByteString.CopyFrom(byteBuffer);
}
[Benchmark]
public ByteString UnsafeWrap()
{
return UnsafeByteOperations.UnsafeWrap(byteBuffer);
}
}
}

@ -34,6 +34,9 @@ using System;
using System.Text;
using NUnit.Framework;
using System.IO;
using System.Collections.Generic;
using System.Collections;
using System.Linq;
#if !NET35
using System.Threading.Tasks;
#endif
@ -54,6 +57,7 @@ namespace Google.Protobuf
EqualityTester.AssertInequality(b1, b3);
EqualityTester.AssertInequality(b1, b4);
EqualityTester.AssertInequality(b1, null);
EqualityTester.AssertEquality(ByteString.Empty, ByteString.Empty);
#pragma warning disable 1718 // Deliberately calling ==(b1, b1) and !=(b1, b1)
Assert.IsTrue(b1 == b1);
Assert.IsTrue(b1 == b2);
@ -63,6 +67,7 @@ namespace Google.Protobuf
Assert.IsTrue((ByteString) null == null);
Assert.IsFalse(b1 != b1);
Assert.IsFalse(b1 != b2);
Assert.IsTrue(ByteString.Empty == ByteString.Empty);
#pragma warning disable 1718
Assert.IsTrue(b1 != b3);
Assert.IsTrue(b1 != b4);
@ -154,6 +159,65 @@ namespace Google.Protobuf
Assert.AreEqual(3, bs[1]);
}
[Test]
public void CopyTo()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = ByteString.CopyFrom(data);
byte[] dest = new byte[data.Length];
bs.CopyTo(dest, 0);
CollectionAssert.AreEqual(data, dest);
}
[Test]
public void GetEnumerator()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = ByteString.CopyFrom(data);
IEnumerator<byte> genericEnumerator = bs.GetEnumerator();
Assert.IsTrue(genericEnumerator.MoveNext());
Assert.AreEqual(0, genericEnumerator.Current);
IEnumerator enumerator = ((IEnumerable)bs).GetEnumerator();
Assert.IsTrue(enumerator.MoveNext());
Assert.AreEqual(0, enumerator.Current);
// Call via LINQ
CollectionAssert.AreEqual(bs.Span.ToArray(), bs.ToArray());
}
[Test]
public void UnsafeWrap()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = UnsafeByteOperations.UnsafeWrap(data.AsMemory(2, 3));
ReadOnlySpan<byte> s = bs.Span;
Assert.AreEqual(3, s.Length);
Assert.AreEqual(2, s[0]);
Assert.AreEqual(3, s[1]);
Assert.AreEqual(4, s[2]);
// Check that the value is not a copy
data[2] = byte.MaxValue;
Assert.AreEqual(byte.MaxValue, s[0]);
}
[Test]
public void WriteToStream()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = ByteString.CopyFrom(data);
MemoryStream ms = new MemoryStream();
bs.WriteTo(ms);
CollectionAssert.AreEqual(data, ms.ToArray());
}
[Test]
public void ToStringUtf8()
{

@ -34,6 +34,7 @@ using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Security;
using System.Text;
#if !NET35
@ -49,40 +50,26 @@ namespace Google.Protobuf
/// <summary>
/// Immutable array of bytes.
/// </summary>
[SecuritySafeCritical]
public sealed class ByteString : IEnumerable<byte>, IEquatable<ByteString>
{
private static readonly ByteString empty = new ByteString(new byte[0]);
private readonly byte[] bytes;
private readonly ReadOnlyMemory<byte> bytes;
/// <summary>
/// Unsafe operations that can cause IO Failure and/or other catastrophic side-effects.
/// Internal use only. Ensure that the provided memory is not mutated and belongs to this instance.
/// </summary>
internal static class Unsafe
{
/// <summary>
/// Constructs a new ByteString from the given byte array. The array is
/// *not* copied, and must not be modified after this constructor is called.
/// </summary>
internal static ByteString FromBytes(byte[] bytes)
{
return new ByteString(bytes);
}
}
/// <summary>
/// Internal use only. Ensure that the provided array is not mutated and belongs to this instance.
/// </summary>
internal static ByteString AttachBytes(byte[] bytes)
internal static ByteString AttachBytes(ReadOnlyMemory<byte> bytes)
{
return new ByteString(bytes);
}
/// <summary>
/// Constructs a new ByteString from the given byte array. The array is
/// Constructs a new ByteString from the given memory. The memory is
/// *not* copied, and must not be modified after this constructor is called.
/// </summary>
private ByteString(byte[] bytes)
private ByteString(ReadOnlyMemory<byte> bytes)
{
this.bytes = bytes;
}
@ -117,11 +104,7 @@ namespace Google.Protobuf
/// </summary>
public ReadOnlySpan<byte> Span
{
[SecuritySafeCritical]
get
{
return new ReadOnlySpan<byte>(bytes);
}
get { return bytes.Span; }
}
/// <summary>
@ -130,11 +113,7 @@ namespace Google.Protobuf
/// </summary>
public ReadOnlyMemory<byte> Memory
{
[SecuritySafeCritical]
get
{
return new ReadOnlyMemory<byte>(bytes);
}
get { return bytes; }
}
/// <summary>
@ -144,7 +123,7 @@ namespace Google.Protobuf
/// <returns>A byte array with the same data as this <c>ByteString</c>.</returns>
public byte[] ToByteArray()
{
return (byte[]) bytes.Clone();
return bytes.ToArray();
}
/// <summary>
@ -153,7 +132,16 @@ namespace Google.Protobuf
/// <returns>A base64 representation of this <c>ByteString</c>.</returns>
public string ToBase64()
{
return Convert.ToBase64String(bytes);
if (MemoryMarshal.TryGetArray(bytes, out ArraySegment<byte> segment))
{
// Fast path. ByteString was created with an array, so pass the underlying array.
return Convert.ToBase64String(segment.Array, segment.Offset, segment.Count);
}
else
{
// Slow path. BytesString is not an array. Convert memory and pass result to ToBase64String.
return Convert.ToBase64String(bytes.ToArray());
}
}
/// <summary>
@ -197,21 +185,10 @@ namespace Google.Protobuf
/// <param name="stream">The stream to copy into a ByteString.</param>
/// <param name="cancellationToken">The cancellation token to use when reading from the stream, if any.</param>
/// <returns>A ByteString with content read from the given stream.</returns>
public async static Task<ByteString> FromStreamAsync(Stream stream, CancellationToken cancellationToken = default(CancellationToken))
public static Task<ByteString> FromStreamAsync(Stream stream, CancellationToken cancellationToken = default(CancellationToken))
{
ProtoPreconditions.CheckNotNull(stream, nameof(stream));
int capacity = stream.CanSeek ? checked((int) (stream.Length - stream.Position)) : 0;
var memoryStream = new MemoryStream(capacity);
// We have to specify the buffer size here, as there's no overload accepting the cancellation token
// alone. But it's documented to use 81920 by default if not specified.
await stream.CopyToAsync(memoryStream, 81920, cancellationToken);
#if NETSTANDARD1_1 || NETSTANDARD2_0
byte[] bytes = memoryStream.ToArray();
#else
// Avoid an extra copy if we can.
byte[] bytes = memoryStream.Length == memoryStream.Capacity ? memoryStream.GetBuffer() : memoryStream.ToArray();
#endif
return AttachBytes(bytes);
return ByteStringAsync.FromStreamAsyncCore(stream, cancellationToken);
}
#endif
@ -242,7 +219,6 @@ namespace Google.Protobuf
/// are copied, so further modifications to the span will not
/// be reflected in the returned <see cref="ByteString" />.
/// </summary>
[SecuritySafeCritical]
public static ByteString CopyFrom(ReadOnlySpan<byte> bytes)
{
return new ByteString(bytes.ToArray());
@ -270,7 +246,7 @@ namespace Google.Protobuf
/// </summary>
public byte this[int index]
{
get { return bytes[index]; }
get { return bytes.Span[index]; }
}
/// <summary>
@ -284,7 +260,18 @@ namespace Google.Protobuf
/// <returns>The result of decoding the binary data with the given decoding.</returns>
public string ToString(Encoding encoding)
{
return encoding.GetString(bytes, 0, bytes.Length);
if (MemoryMarshal.TryGetArray(bytes, out ArraySegment<byte> segment))
{
// Fast path. ByteString was created with an array.
return encoding.GetString(segment.Array, segment.Offset, segment.Count);
}
else
{
// Slow path. BytesString is not an array. Convert memory and pass result to GetString.
// TODO: Consider using GetString overload that takes a pointer.
byte[] array = bytes.ToArray();
return encoding.GetString(array, 0, array.Length);
}
}
/// <summary>
@ -304,9 +291,10 @@ namespace Google.Protobuf
/// Returns an iterator over the bytes in this <see cref="ByteString"/>.
/// </summary>
/// <returns>An iterator over the bytes in this object.</returns>
[SecuritySafeCritical]
public IEnumerator<byte> GetEnumerator()
{
return ((IEnumerable<byte>) bytes).GetEnumerator();
return MemoryMarshal.ToEnumerable(bytes).GetEnumerator();
}
/// <summary>
@ -324,7 +312,17 @@ namespace Google.Protobuf
public CodedInputStream CreateCodedInput()
{
// We trust CodedInputStream not to reveal the provided byte array or modify it
return new CodedInputStream(bytes);
if (MemoryMarshal.TryGetArray(bytes, out ArraySegment<byte> segment) && segment.Count == bytes.Length)
{
// Fast path. ByteString was created with a complete array.
return new CodedInputStream(segment.Array);
}
else
{
// Slow path. BytesString is not an array, or is a slice of an array.
// Convert memory and pass result to WriteRawBytes.
return new CodedInputStream(bytes.ToArray());
}
}
/// <summary>
@ -343,18 +341,8 @@ namespace Google.Protobuf
{
return false;
}
if (lhs.bytes.Length != rhs.bytes.Length)
{
return false;
}
for (int i = 0; i < lhs.Length; i++)
{
if (rhs.bytes[i] != lhs.bytes[i])
{
return false;
}
}
return true;
return lhs.bytes.Span.SequenceEqual(rhs.bytes.Span);
}
/// <summary>
@ -373,6 +361,7 @@ namespace Google.Protobuf
/// </summary>
/// <param name="obj">The object to compare this with.</param>
/// <returns><c>true</c> if <paramref name="obj"/> refers to an equal <see cref="ByteString"/>; <c>false</c> otherwise.</returns>
[SecuritySafeCritical]
public override bool Equals(object obj)
{
return this == (obj as ByteString);
@ -383,12 +372,15 @@ namespace Google.Protobuf
/// will return the same hash code.
/// </summary>
/// <returns>A hash code for this object.</returns>
[SecuritySafeCritical]
public override int GetHashCode()
{
ReadOnlySpan<byte> b = bytes.Span;
int ret = 23;
foreach (byte b in bytes)
for (int i = 0; i < b.Length; i++)
{
ret = (ret * 31) + b;
ret = (ret * 31) + b[i];
}
return ret;
}
@ -403,20 +395,12 @@ namespace Google.Protobuf
return this == other;
}
/// <summary>
/// Used internally by CodedOutputStream to avoid creating a copy for the write
/// </summary>
internal void WriteRawBytesTo(CodedOutputStream outputStream)
{
outputStream.WriteRawBytes(bytes, 0, bytes.Length);
}
/// <summary>
/// Copies the entire byte array to the destination array provided at the offset specified.
/// </summary>
public void CopyTo(byte[] array, int position)
{
ByteArray.Copy(bytes, 0, array, position, bytes.Length);
bytes.CopyTo(array.AsMemory(position));
}
/// <summary>
@ -424,7 +408,17 @@ namespace Google.Protobuf
/// </summary>
public void WriteTo(Stream outputStream)
{
outputStream.Write(bytes, 0, bytes.Length);
if (MemoryMarshal.TryGetArray(bytes, out ArraySegment<byte> segment))
{
// Fast path. ByteString was created with an array, so pass the underlying array.
outputStream.Write(segment.Array, segment.Offset, segment.Count);
}
else
{
// Slow path. BytesString is not an array. Convert memory and pass result to WriteRawBytes.
var array = bytes.ToArray();
outputStream.Write(array, 0, array.Length);
}
}
}
}

@ -0,0 +1,64 @@
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace Google.Protobuf
{
/// <summary>
/// SecuritySafeCritical attribute can not be placed on types with async methods.
/// This class has ByteString's async methods so it can be marked with SecuritySafeCritical.
/// </summary>
internal static class ByteStringAsync
{
#if !NET35
internal static async Task<ByteString> FromStreamAsyncCore(Stream stream, CancellationToken cancellationToken)
{
int capacity = stream.CanSeek ? checked((int)(stream.Length - stream.Position)) : 0;
var memoryStream = new MemoryStream(capacity);
// We have to specify the buffer size here, as there's no overload accepting the cancellation token
// alone. But it's documented to use 81920 by default if not specified.
await stream.CopyToAsync(memoryStream, 81920, cancellationToken);
#if NETSTANDARD1_1 || NETSTANDARD2_0
byte[] bytes = memoryStream.ToArray();
#else
// Avoid an extra copy if we can.
byte[] bytes = memoryStream.Length == memoryStream.Capacity ? memoryStream.GetBuffer() : memoryStream.ToArray();
#endif
return ByteString.AttachBytes(bytes);
}
#endif
}
}

@ -0,0 +1,81 @@
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion
using System;
using System.Security;
namespace Google.Protobuf
{
/// <summary>
/// Provides a number of unsafe byte operations to be used by advanced applications with high performance
/// requirements. These methods are referred to as "unsafe" due to the fact that they potentially expose
/// the backing buffer of a <see cref="ByteString"/> to the application.
/// </summary>
/// <remarks>
/// <para>
/// The methods in this class should only be called if it is guaranteed that the buffer backing the
/// <see cref="ByteString"/> will never change! Mutation of a <see cref="ByteString"/> can lead to unexpected
/// and undesirable consequences in your application, and will likely be difficult to debug. Proceed with caution!
/// </para>
/// <para>
/// This can have a number of significant side affects that have spooky-action-at-a-distance-like behavior. In
/// particular, if the bytes value changes out from under a Protocol Buffer:
/// </para>
/// <list type="bullet">
/// <item>
/// <description>serialization may throw</description>
/// </item>
/// <item>
/// <description>serialization may succeed but the wrong bytes may be written out</description>
/// </item>
/// <item>
/// <description>messages are no longer threadsafe</description>
/// </item>
/// <item>
/// <description>hashCode may be incorrect</description>
/// </item>
/// </list>
/// </remarks>
[SecuritySafeCritical]
public static class UnsafeByteOperations
{
/// <summary>
/// Constructs a new <see cref="ByteString" /> from the given bytes. The bytes are not copied,
/// and must not be modified while the <see cref="ByteString" /> is in use.
/// This API is experimental and subject to change.
/// </summary>
public static ByteString UnsafeWrap(ReadOnlyMemory<byte> bytes)
{
return ByteString.AttachBytes(bytes);
}
}
}
Loading…
Cancel
Save