Merge pull request #8149 from JamesNK/jamesnk/writestring-small

Optimize writing small strings
pull/8243/head
Jan Tattermusch 4 years ago committed by GitHub
commit 4140735f05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 24
      csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs
  2. 56
      csharp/src/Google.Protobuf/WritingPrimitives.cs

@ -35,6 +35,7 @@ using System.IO;
using Google.Protobuf.TestProtos; using Google.Protobuf.TestProtos;
using Google.Protobuf.Buffers; using Google.Protobuf.Buffers;
using NUnit.Framework; using NUnit.Framework;
using System.Text;
namespace Google.Protobuf namespace Google.Protobuf
{ {
@ -516,5 +517,28 @@ namespace Google.Protobuf
var stream = new CodedOutputStream(new byte[10]); var stream = new CodedOutputStream(new byte[10]);
stream.Dispose(); stream.Dispose();
} }
[Test]
public void WriteStringsOfDifferentSizes()
{
for (int i = 1; i <= 1024; i++)
{
var buffer = new byte[4096];
var output = new CodedOutputStream(buffer);
var sb = new StringBuilder();
for (int j = 0; j < i; j++)
{
sb.Append((j % 10).ToString()); // incrementing numbers, repeating
}
var s = sb.ToString();
output.WriteString(s);
output.Flush();
// Verify written content
var input = new CodedInputStream(buffer);
Assert.AreEqual(s, input.ReadString());
}
}
} }
} }

@ -163,10 +163,25 @@ namespace Google.Protobuf
/// </summary> /// </summary>
public static void WriteString(ref Span<byte> buffer, ref WriterInternalState state, string value) public static void WriteString(ref Span<byte> buffer, ref WriterInternalState state, string value)
{ {
// Optimise the case where we have enough space to write const int MaxBytesPerChar = 3;
// the string directly to the buffer, which should be common. const int MaxSmallStringLength = 128 / MaxBytesPerChar;
// The string is small enough that the length will always be a 1 byte varint.
// Also there is enough space to write length + bytes to buffer.
// Write string directly to the buffer, and then write length.
// This saves calling GetByteCount on the string. We get the string length from GetBytes.
if (value.Length <= MaxSmallStringLength && buffer.Length - state.position - 1 >= value.Length * MaxBytesPerChar)
{
int indexOfLengthDelimiter = state.position++;
buffer[indexOfLengthDelimiter] = (byte)WriteStringToBuffer(buffer, ref state, value);
return;
}
int length = Utf8Encoding.GetByteCount(value); int length = Utf8Encoding.GetByteCount(value);
WriteLength(ref buffer, ref state, length); WriteLength(ref buffer, ref state, length);
// Optimise the case where we have enough space to write
// the string directly to the buffer, which should be common.
if (buffer.Length - state.position >= length) if (buffer.Length - state.position >= length)
{ {
if (length == value.Length) // Must be all ASCII... if (length == value.Length) // Must be all ASCII...
@ -179,35 +194,46 @@ namespace Google.Protobuf
} }
else else
{ {
WriteStringToBuffer(buffer, ref state, value);
}
}
else
{
// Opportunity for future optimization:
// Large strings that don't fit into the current buffer segment
// can probably be optimized by using Utf8Encoding.GetEncoder()
// but more benchmarks would need to be added as evidence.
byte[] bytes = Utf8Encoding.GetBytes(value);
WriteRawBytes(ref buffer, ref state, bytes);
}
}
private static int WriteStringToBuffer(Span<byte> buffer, ref WriterInternalState state, string value)
{
#if NETSTANDARD1_1 #if NETSTANDARD1_1
// slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available // slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available
byte[] bytes = Utf8Encoding.GetBytes(value); byte[] bytes = Utf8Encoding.GetBytes(value);
WriteRawBytes(ref buffer, ref state, bytes); WriteRawBytes(ref buffer, ref state, bytes);
return bytes.Length;
#else #else
ReadOnlySpan<char> source = value.AsSpan(); ReadOnlySpan<char> source = value.AsSpan();
int bytesUsed; int bytesUsed;
unsafe unsafe
{ {
fixed (char* sourceChars = &MemoryMarshal.GetReference(source)) fixed (char* sourceChars = &MemoryMarshal.GetReference(source))
fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer.Slice(state.position))) fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer))
{ {
bytesUsed = Utf8Encoding.GetBytes(sourceChars, source.Length, destinationBytes, buffer.Length); bytesUsed = Utf8Encoding.GetBytes(
sourceChars,
source.Length,
destinationBytes + state.position,
buffer.Length - state.position);
} }
} }
state.position += bytesUsed; state.position += bytesUsed;
return bytesUsed;
#endif #endif
} }
}
else
{
// Opportunity for future optimization:
// Large strings that don't fit into the current buffer segment
// can probably be optimized by using Utf8Encoding.GetEncoder()
// but more benchmarks would need to be added as evidence.
byte[] bytes = Utf8Encoding.GetBytes(value);
WriteRawBytes(ref buffer, ref state, bytes);
}
}
/// <summary> /// <summary>
/// Write a byte string, without a tag, to the stream. /// Write a byte string, without a tag, to the stream.

Loading…
Cancel
Save