From cb246b8032c7e8fe1eeb2c37f880d7259d69b68e Mon Sep 17 00:00:00 2001 From: James Newton-King Date: Wed, 16 Dec 2020 16:14:43 +1300 Subject: [PATCH] Optimize writing small strings --- .../CodedOutputStreamTest.cs | 24 +++++++ .../src/Google.Protobuf/WritingPrimitives.cs | 65 +++++++++++++------ 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs index 1c77e121d3..1e5333c965 100644 --- a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs @@ -35,6 +35,7 @@ using System.IO; using Google.Protobuf.TestProtos; using Google.Protobuf.Buffers; using NUnit.Framework; +using System.Text; namespace Google.Protobuf { @@ -516,5 +517,28 @@ namespace Google.Protobuf var stream = new CodedOutputStream(new byte[10]); stream.Dispose(); } + + [Test] + public void WriteStringsOfDifferentSizes() + { + for (int i = 1; i <= 1024; i++) + { + var buffer = new byte[4096]; + var output = new CodedOutputStream(buffer); + var sb = new StringBuilder(); + for (int j = 0; j < i; j++) + { + sb.Append((j % 10).ToString()); // incrementing numbers, repeating + } + var s = sb.ToString(); + output.WriteString(s); + + output.Flush(); + + // Verify written content + var input = new CodedInputStream(buffer); + Assert.AreEqual(s, input.ReadString()); + } + } } } \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/WritingPrimitives.cs b/csharp/src/Google.Protobuf/WritingPrimitives.cs index 846df73502..7773905597 100644 --- a/csharp/src/Google.Protobuf/WritingPrimitives.cs +++ b/csharp/src/Google.Protobuf/WritingPrimitives.cs @@ -163,10 +163,26 @@ namespace Google.Protobuf /// public static void WriteString(ref Span buffer, ref WriterInternalState state, string value) { - // Optimise the case where we have enough space to write - // the string directly to the buffer, which should be common. + const int MaxBytesPerChar = 3; + const int MaxSmallStringLength = 128 / MaxBytesPerChar; + + // The string is small enough that the length will always be a 1 byte varint. + // Also there is enough space to write length + bytes to buffer. + // Write string directly to the buffer, and then write length. + // This saves calling GetByteCount on the string. We get the string length from GetBytes. + if (value.Length <= MaxSmallStringLength && buffer.Length - state.position - 1 >= value.Length * MaxBytesPerChar) + { + // Get the original position, then increment it on state by 1, then write string to buffer. + // Method will return byte length, which is then set to the original position. + buffer[state.position++] = (byte)WriteStringToBuffer(buffer, ref state, value); + return; + } + int length = Utf8Encoding.GetByteCount(value); WriteLength(ref buffer, ref state, length); + + // Optimise the case where we have enough space to write + // the string directly to the buffer, which should be common. if (buffer.Length - state.position >= length) { if (length == value.Length) // Must be all ASCII... @@ -179,23 +195,7 @@ namespace Google.Protobuf } else { -#if NETSTANDARD1_1 - // slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available - byte[] bytes = Utf8Encoding.GetBytes(value); - WriteRawBytes(ref buffer, ref state, bytes); -#else - ReadOnlySpan source = value.AsSpan(); - int bytesUsed; - unsafe - { - fixed (char* sourceChars = &MemoryMarshal.GetReference(source)) - fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer.Slice(state.position))) - { - bytesUsed = Utf8Encoding.GetBytes(sourceChars, source.Length, destinationBytes, buffer.Length); - } - } - state.position += bytesUsed; -#endif + WriteStringToBuffer(buffer, ref state, value); } } else @@ -209,6 +209,33 @@ namespace Google.Protobuf } } + private static int WriteStringToBuffer(Span buffer, ref WriterInternalState state, string value) + { +#if NETSTANDARD1_1 + // slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available + byte[] bytes = Utf8Encoding.GetBytes(value); + WriteRawBytes(ref buffer, ref state, bytes); + return bytes.Length; +#else + ReadOnlySpan source = value.AsSpan(); + int bytesUsed; + unsafe + { + fixed (char* sourceChars = &MemoryMarshal.GetReference(source)) + fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer)) + { + bytesUsed = Utf8Encoding.GetBytes( + sourceChars, + source.Length, + destinationBytes + state.position, + buffer.Length - state.position); + } + } + state.position += bytesUsed; + return bytesUsed; +#endif + } + /// /// Write a byte string, without a tag, to the stream. /// The data is length-prefixed.