From 4320d5d2dd2f7a88a9a8f88d5b7574cd0b7a63a8 Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Tue, 19 Jul 2022 11:46:31 +0100 Subject: [PATCH] Add tests for C# behavior around invalid UTF-8 These are primarily in order to document the current behavior, rather than asserting it's the "right" behavior. --- .../ParsingPrimitivesTest.cs | 63 +++++++++++++++++++ .../WritingPrimitivesTest.cs | 61 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 csharp/src/Google.Protobuf.Test/ParsingPrimitivesTest.cs create mode 100644 csharp/src/Google.Protobuf.Test/WritingPrimitivesTest.cs diff --git a/csharp/src/Google.Protobuf.Test/ParsingPrimitivesTest.cs b/csharp/src/Google.Protobuf.Test/ParsingPrimitivesTest.cs new file mode 100644 index 0000000000..4d0aa9e015 --- /dev/null +++ b/csharp/src/Google.Protobuf.Test/ParsingPrimitivesTest.cs @@ -0,0 +1,63 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2022 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using NUnit.Framework; +using System; +using System.Linq; + +namespace Google.Protobuf.Test; + +internal class ParsingPrimitivesTest +{ + // Note: test cases use integers rather than bytes as they're easier + // to specify in attributes. + + [Test] + [TestCase("\ufffd", 255)] + [TestCase("A\ufffd", 65, 255)] + [TestCase("A\ufffd\ufffdB", 65, 255, 255, 66)] + // Overlong form of "space" + [TestCase("\ufffd\ufffd", 0xc0, 0xa0)] + public void ReadRawString_NonUtf8(string expectedText, params int[] bytes) + { + var context = CreateContext(bytes); + string text = ParsingPrimitives.ReadRawString(ref context.buffer, ref context.state, bytes.Length); + Assert.AreEqual(expectedText, text); + } + + private static ParseContext CreateContext(int[] bytes) + { + byte[] actualBytes = bytes.Select(b => (byte) b).ToArray(); + ParseContext.Initialize(actualBytes.AsSpan(), out var context); + return context; + } +} diff --git a/csharp/src/Google.Protobuf.Test/WritingPrimitivesTest.cs b/csharp/src/Google.Protobuf.Test/WritingPrimitivesTest.cs new file mode 100644 index 0000000000..069df343fd --- /dev/null +++ b/csharp/src/Google.Protobuf.Test/WritingPrimitivesTest.cs @@ -0,0 +1,61 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2022 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Google.Protobuf.Test; + +internal class WritingPrimitivesTest +{ + [Test] + public void WriteRawString_IllFormedUnicodeString() + { + // See https://codeblog.jonskeet.uk/2014/11/07/when-is-a-string-not-a-string/ + char c1 = '\u0058'; + char c2 = '\ud800'; + char c3 = '\u0059'; + string text = new string(new[] { c1, c2, c3 }); + Span buffer = new byte[10]; + WriteContext.Initialize(ref buffer, out var context); + WritingPrimitives.WriteString(ref context.buffer, ref context.state, text); + + // The high surrogate is written out in a "raw" form, surrounded by the ASCII + // characters. + byte[] expectedBytes = { 0x5, 0x58, 0xef, 0xbf, 0xbd, 0x59 }; + Assert.AreEqual(expectedBytes, buffer.Slice(0, context.state.position).ToArray()); + } +}