add C# deterministic serialization (#13160)

https://github.com/protocolbuffers/protobuf/issues/12881

@jskeet  review please :-) as this is my first contrib to protobuf, any design / performance feedback is very sought after

Closes #13160

COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/13160 from fmg-lydonchandra:feature/12881_cs_serialization_deterministic ab7e01b804
PiperOrigin-RevId: 568448399
pull/14167/head
Lydon Chandra 2 years ago committed by Copybara-Service
parent 67b9c76752
commit 96100bf1e4
  1. 53
      csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
  2. 75
      csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
  3. 29
      csharp/src/Google.Protobuf/CodedOutputStream.cs
  4. 42
      csharp/src/Google.Protobuf/Collections/MapField.cs

@ -632,6 +632,59 @@ namespace Google.Protobuf.Collections
CollectionAssert.AreEquivalent(((IDictionary<string, string>)map).Values, ((IReadOnlyDictionary<string, string>)map).Values);
}
[Test]
public void SortIntKeys_RandomOrder()
{
var map = new MapField<int, string>() { { 1, "val" }, { -1, "val"}, { 0, "val" } };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { -1, 0, 1 }, sortedKeys);
}
[Test]
public void SortIntKeys_Empty()
{
var map = new MapField<int, string> { };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
Assert.IsEmpty(sortedKeys);
}
[Test]
public void SortStringKeys_RandomOrder()
{
var map = new MapField<string, string> { { "a", "val" }, { "c", "val" }, { "b", "val" } };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { "a", "b", "c" }, sortedKeys);
}
[Test]
public void SortStringKeys_EnsureOrdinalSort()
{
var map = new MapField<string, string>
{
{ "i", "val" } , { "I", "val" }, { "ı", "val" }, { "İ", "val" }
};
var sortedList = map.GetSortedListCopy(map.ToList());
var sortedKeys = sortedList.Select(kvp => kvp.Key);
// Assert Ordinal sort I, i, ı, İ (Non-ordinal sort returns i, I, İ, ı)
// I == 0x49 , i == 0x69 , İ == 0x130 , ı == 0x131
CollectionAssert.AreEqual(new[] { "I", "i", "İ", "ı" }, sortedKeys);
}
[Test]
public void SortBoolKeys()
{
var map = new MapField<bool, string>
{
{ true, "val" } , { false, "val" }
};
var sortedList = map.GetSortedListCopy(map.ToList());
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { false, true }, sortedKeys);
}
private static KeyValuePair<TKey, TValue> NewKeyValuePair<TKey, TValue>(TKey key, TValue value)
{
return new KeyValuePair<TKey, TValue>(key, value);

@ -311,7 +311,7 @@ namespace Google.Protobuf
output.WriteTag(TestMap.MapInt32Int32FieldNumber, WireFormat.WireType.LengthDelimited);
var key = 10; // Field 1
var key = 10; // Field 1
var value = 20; // Field 2
var extra = 30; // Field 3
@ -668,6 +668,79 @@ namespace Google.Protobuf
});
}
[Test]
public void MapStringString_DeterministicTrue_ThenBytesIdentical()
{
// Define three strings consisting of different versions of the letter I.
// LATIN CAPITAL LETTER I (U+0049)
string capitalLetterI = "I";
// LATIN SMALL LETTER I (U+0069)
string smallLetterI = "i";
// LATIN SMALL LETTER DOTLESS I (U+0131)
string smallLetterDotlessI = "\u0131";
var testMap1 = new TestMap();
testMap1.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
testMap1.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
testMap1.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
var bytes1 = SerializeTestMap(testMap1, true);
var testMap2 = new TestMap();
testMap2.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
testMap2.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
testMap2.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
var bytes2 = SerializeTestMap(testMap2, true);
var parsedBytes2 = TestMap.Parser.ParseFrom(bytes2);
var parsedBytes1 = TestMap.Parser.ParseFrom(bytes1);
Assert.IsTrue(bytes1.SequenceEqual(bytes2));
}
[Test]
public void MapInt32Bytes_DeterministicTrue_ThenBytesIdentical()
{
var testMap1 = new TestMap();
testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
var bytes1 = SerializeTestMap(testMap1, true);
var testMap2 = new TestMap();
testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
var bytes2 = SerializeTestMap(testMap2, true);
Assert.IsTrue(bytes1.SequenceEqual(bytes2));
}
[Test]
public void MapInt32Bytes_DeterministicFalse_ThenBytesDifferent()
{
var testMap1 = new TestMap();
testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
var bytes1 = SerializeTestMap(testMap1, false);
var testMap2 = new TestMap();
testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
var bytes2 = SerializeTestMap(testMap2, false);
Assert.IsFalse(bytes1.SequenceEqual(bytes2));
}
private byte[] SerializeTestMap(TestMap testMap, bool deterministic)
{
using var memoryStream = new MemoryStream();
var codedOutputStream = new CodedOutputStream(memoryStream);
codedOutputStream.Deterministic = deterministic;
testMap.WriteTo(codedOutputStream);
codedOutputStream.Flush();
memoryStream.Seek(0, SeekOrigin.Begin);
return memoryStream.ToArray();
}
[Test]
public void DiscardUnknownFields_RealDataStillRead()
{

@ -137,6 +137,33 @@ namespace Google.Protobuf
}
}
/// <summary>
/// Configures whether or not serialization is deterministic.
/// </summary>
/// <remarks>
/// Deterministic serialization guarantees that for a given binary, equal messages (defined by the
/// equals methods in protos) will always be serialized to the same bytes. This implies:
/// <list type="bullet">
/// <item><description>Repeated serialization of a message will return the same bytes.</description></item>
/// <item><description>Different processes of the same binary (which may be executing on different machines)
/// will serialize equal messages to the same bytes.</description></item>
/// </list>
/// Note the deterministic serialization is NOT canonical across languages; it is also unstable
/// across different builds with schema changes due to unknown fields. Users who need canonical
/// serialization, e.g. persistent storage in a canonical form, fingerprinting, etc, should define
/// their own canonicalization specification and implement the serializer using reflection APIs
/// rather than relying on this API.
/// Once set, the serializer will: (Note this is an implementation detail and may subject to
/// change in the future)
/// <list type="bullet">
/// <item><description>Sort map entries by keys in lexicographical order or numerical order. Note: For string
/// keys, the order is based on comparing the UTF-16 code unit value of each character in the strings.
/// The order may be different from the deterministic serialization in other languages where
/// maps are sorted on the lexicographical order of the UTF8 encoded keys.</description></item>
/// </list>
/// </remarks>
public bool Deterministic { get; set; }
#region Writing of values (not including tags)
/// <summary>
@ -462,7 +489,7 @@ namespace Google.Protobuf
#endregion
#region Underlying writing primitives
/// <summary>
/// Writes a 32 bit value as a varint. The fast route is taken when
/// there's enough buffer space left to whizz through without checking

@ -327,7 +327,7 @@ namespace Google.Protobuf.Collections
/// Returns a hash code for this instance.
/// </summary>
/// <returns>
/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// </returns>
public override int GetHashCode()
{
@ -432,7 +432,13 @@ namespace Google.Protobuf.Collections
WriteContext.Initialize(output, out WriteContext ctx);
try
{
WriteTo(ref ctx, codec);
IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
if (output.Deterministic)
{
listToWrite = GetSortedListCopy(list);
}
WriteTo(ref ctx, codec, listToWrite);
}
finally
{
@ -440,6 +446,23 @@ namespace Google.Protobuf.Collections
}
}
internal IEnumerable<KeyValuePair<TKey, TValue>> GetSortedListCopy(IEnumerable<KeyValuePair<TKey, TValue>> listToSort)
{
// We can't sort the list in place, as that would invalidate the linked list.
// Instead, we create a new list, sort that, and then write it out.
var listToWrite = new List<KeyValuePair<TKey, TValue>>(listToSort);
listToWrite.Sort((pair1, pair2) =>
{
if (typeof(TKey) == typeof(string))
{
// Use Ordinal, otherwise Comparer<string>.Default uses StringComparer.CurrentCulture
return StringComparer.Ordinal.Compare(pair1.Key.ToString(), pair2.Key.ToString());
}
return Comparer<TKey>.Default.Compare(pair1.Key, pair2.Key);
});
return listToWrite;
}
/// <summary>
/// Writes the contents of this map to the given write context, using the specified codec
/// to encode each entry.
@ -449,7 +472,18 @@ namespace Google.Protobuf.Collections
[SecuritySafeCritical]
public void WriteTo(ref WriteContext ctx, Codec codec)
{
foreach (var entry in list)
IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
if (ctx.state.CodedOutputStream?.Deterministic ?? false)
{
listToWrite = GetSortedListCopy(list);
}
WriteTo(ref ctx, codec, listToWrite);
}
[SecuritySafeCritical]
private void WriteTo(ref WriteContext ctx, Codec codec, IEnumerable<KeyValuePair<TKey, TValue>> listKvp)
{
foreach (var entry in listKvp)
{
ctx.WriteTag(codec.MapTag);
@ -631,7 +665,7 @@ namespace Google.Protobuf.Collections
this.containsCheck = containsCheck;
}
public int Count => parent.Count;
public int Count => parent.Count;
public bool IsReadOnly => true;

Loading…
Cancel
Save