diff --git a/csharp/src/Google.Protobuf.Benchmarks/ParseRawPrimitivesBenchmark.cs b/csharp/src/Google.Protobuf.Benchmarks/ParseRawPrimitivesBenchmark.cs index 75658a9ef8..2f226a323c 100644 --- a/csharp/src/Google.Protobuf.Benchmarks/ParseRawPrimitivesBenchmark.cs +++ b/csharp/src/Google.Protobuf.Benchmarks/ParseRawPrimitivesBenchmark.cs @@ -463,5 +463,34 @@ namespace Google.Protobuf.Benchmarks } return str; } + + public static string CreateNonAsciiStringWithEncodedSize(int encodedSize) + { + if (encodedSize < 3) + { + throw new ArgumentException("Illegal encoded size for a string with non-ascii chars."); + } + var twoByteChar = '\u00DC'; // U-umlaut, UTF8 encoding has 2 bytes + var str = new string(twoByteChar, encodedSize / 2); + while (CodedOutputStream.ComputeStringSize(str) > encodedSize) + { + str = str.Substring(1); + } + + // add padding of ascii characters to reach the desired encoded size. + while (CodedOutputStream.ComputeStringSize(str) < encodedSize) + { + str += 'a'; + } + + // Note that for a few specific encodedSize values, it might be impossible to generate + // the string with the desired encodedSize using the algorithm above. For testing purposes, checking that + // the encoded size we got is actually correct is good enough. + if (CodedOutputStream.ComputeStringSize(str) != encodedSize) + { + throw new InvalidOperationException($"Generated string with wrong encodedSize"); + } + return str; + } } } diff --git a/csharp/src/Google.Protobuf.Benchmarks/WriteRawPrimitivesBenchmark.cs b/csharp/src/Google.Protobuf.Benchmarks/WriteRawPrimitivesBenchmark.cs index 7dacc9c48c..4df43ed7e2 100644 --- a/csharp/src/Google.Protobuf.Benchmarks/WriteRawPrimitivesBenchmark.cs +++ b/csharp/src/Google.Protobuf.Benchmarks/WriteRawPrimitivesBenchmark.cs @@ -56,6 +56,9 @@ namespace Google.Protobuf.Benchmarks // key is the encodedSize of string values Dictionary stringValues; + // key is the encodedSize of string values + Dictionary nonAsciiStringValues; + // key is the encodedSize of string values Dictionary byteStringValues; @@ -66,6 +69,8 @@ namespace Google.Protobuf.Benchmarks public IEnumerable StringEncodedSizes => new[] { 1, 4, 10, 105, 10080 }; + public IEnumerable NonAsciiStringEncodedSizes => new[] { 4, 10, 105, 10080 }; + [GlobalSetup] public void GlobalSetup() { @@ -86,12 +91,19 @@ namespace Google.Protobuf.Benchmarks floatValues = CreateRandomFloats(random, BytesToWrite / sizeof(float)); stringValues = new Dictionary(); + byteStringValues = new Dictionary(); foreach(var encodedSize in StringEncodedSizes) { stringValues.Add(encodedSize, CreateStrings(BytesToWrite / encodedSize, encodedSize)); byteStringValues.Add(encodedSize, CreateByteStrings(BytesToWrite / encodedSize, encodedSize)); } + + nonAsciiStringValues = new Dictionary(); + foreach(var encodedSize in NonAsciiStringEncodedSizes) + { + nonAsciiStringValues.Add(encodedSize, CreateNonAsciiStrings(BytesToWrite / encodedSize, encodedSize)); + } } // Total number of bytes that each benchmark will write. @@ -318,6 +330,35 @@ namespace Google.Protobuf.Benchmarks ctx.CheckNoSpaceLeft(); } + [Benchmark] + [ArgumentsSource(nameof(NonAsciiStringEncodedSizes))] + public void WriteNonAsciiString_CodedOutputStream(int encodedSize) + { + var values = nonAsciiStringValues[encodedSize]; + var cos = new CodedOutputStream(outputBuffer); + foreach (var value in values) + { + cos.WriteString(value); + } + cos.Flush(); + cos.CheckNoSpaceLeft(); + } + + [Benchmark] + [ArgumentsSource(nameof(NonAsciiStringEncodedSizes))] + public void WriteNonAsciiString_WriteContext(int encodedSize) + { + var values = nonAsciiStringValues[encodedSize]; + var span = new Span(outputBuffer); + WriteContext.Initialize(ref span, out WriteContext ctx); + foreach (var value in values) + { + ctx.WriteString(value); + } + ctx.Flush(); + ctx.CheckNoSpaceLeft(); + } + [Benchmark] [ArgumentsSource(nameof(StringEncodedSizes))] public void WriteBytes_CodedOutputStream(int encodedSize) @@ -399,6 +440,18 @@ namespace Google.Protobuf.Benchmarks return result; } + private static string[] CreateNonAsciiStrings(int valueCount, int encodedSize) + { + var str = ParseRawPrimitivesBenchmark.CreateNonAsciiStringWithEncodedSize(encodedSize); + + var result = new string[valueCount]; + for (int i = 0; i < valueCount; i++) + { + result[i] = str; + } + return result; + } + private static ByteString[] CreateByteStrings(int valueCount, int encodedSize) { var str = ParseRawPrimitivesBenchmark.CreateStringWithEncodedSize(encodedSize);