From a4c9effec11aa7aba35a69c90ed9b3bb550aaa74 Mon Sep 17 00:00:00 2001 From: Darly Paredes Date: Tue, 22 Mar 2022 18:02:10 +0000 Subject: [PATCH] Sync from Piper @436517178 PROTOBUF_SYNC_PIPER --- CHANGES.txt | 15 + .../Google.Protobuf.Test.TestProtos.csproj | 2 +- .../Google.Protobuf.Test/ExtensionSetTest.cs | 61 ++- .../Google.Protobuf.Test.csproj | 2 +- .../RefStructCompatibilityTest.cs | 2 +- csharp/src/Google.Protobuf/Extension.cs | 2 +- csharp/src/Google.Protobuf/ExtensionSet.cs | 55 ++- csharp/src/Google.Protobuf/ExtensionValue.cs | 5 + docs/options.md | 5 + .../java/com/google/protobuf/TextFormat.java | 44 +-- .../com/google/protobuf/CheckUtf8Test.java | 3 +- .../google/protobuf/GeneratedMessageTest.java | 2 +- .../google/protobuf/MapForProto2LiteTest.java | 8 +- .../com/google/protobuf/MapForProto2Test.java | 8 +- .../java/com/google/protobuf/MapLiteTest.java | 8 +- .../java/com/google/protobuf/MapTest.java | 8 +- .../com/google/protobuf/ParserLiteTest.java | 2 +- .../java/com/google/protobuf/ParserTest.java | 5 +- .../protobuf/internal/unknown_fields_test.py | 17 +- .../protobuf/internal/well_known_types.py | 2 + python/google/protobuf/pyext/message.cc | 16 +- .../protobuf/pyext/unknown_field_set.cc | 353 ++++++++++++++++++ .../google/protobuf/pyext/unknown_field_set.h | 78 ++++ python/google/protobuf/unknown_fields.py | 13 +- .../protobuf/compiler/java/java_enum.cc | 5 + .../protobuf/compiler/java/java_enum_field.cc | 7 +- .../compiler/java/java_enum_field_lite.cc | 5 + .../protobuf/compiler/java/java_extension.cc | 5 + .../compiler/java/java_extension_lite.cc | 5 + .../protobuf/compiler/java/java_file.cc | 5 + .../protobuf/compiler/java/java_helpers.cc | 5 + .../protobuf/compiler/java/java_map_field.cc | 5 + .../compiler/java/java_map_field_lite.cc | 5 + .../protobuf/compiler/java/java_message.cc | 5 + .../compiler/java/java_message_builder.cc | 5 + .../java/java_message_builder_lite.cc | 5 + .../compiler/java/java_message_field.cc | 5 + .../compiler/java/java_message_field_lite.cc | 5 + .../compiler/java/java_message_lite.cc | 5 + .../compiler/java/java_name_resolver.cc | 5 + .../compiler/java/java_name_resolver.h | 5 + .../protobuf/compiler/java/java_service.cc | 5 + .../compiler/python/python_helpers.cc | 2 +- .../compiler/python/python_pyi_generator.cc | 201 +++++++--- .../compiler/python/python_pyi_generator.h | 28 +- .../generated_message_tctable_lite.cc | 125 +++---- src/google/protobuf/io/tokenizer.cc | 87 +++-- src/google/protobuf/io/tokenizer_unittest.cc | 112 +++++- src/google/protobuf/port_def.inc | 13 +- src/google/protobuf/port_undef.inc | 2 +- src/google/protobuf/repeated_ptr_field.h | 2 +- src/google/protobuf/text_format.cc | 2 +- 52 files changed, 1121 insertions(+), 261 deletions(-) create mode 100644 python/google/protobuf/pyext/unknown_field_set.cc create mode 100644 python/google/protobuf/pyext/unknown_field_set.h diff --git a/CHANGES.txt b/CHANGES.txt index 1cba2b7688..71b4927b1e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,18 @@ +Unreleased Changes + +C++ + * Refactor generated message class layout + * Optimize tokenizer ParseInteger by removing division + * Reserve exactly the right amount of capacity in ExtensionSet::MergeFrom + +Java + * 6x speedup in ArrayEncoder.writeUInt32NotTag + +Python + * Added UnknownFieldSet(message) for pure Python. The old + message.UnknownFields() will be deprecated after UnknownFieldSet(message) is + added for cpp extension. + 2022-03-04 version 3.20.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) Ruby diff --git a/csharp/src/Google.Protobuf.Test.TestProtos/Google.Protobuf.Test.TestProtos.csproj b/csharp/src/Google.Protobuf.Test.TestProtos/Google.Protobuf.Test.TestProtos.csproj index 5030043d76..ad8445b7bf 100644 --- a/csharp/src/Google.Protobuf.Test.TestProtos/Google.Protobuf.Test.TestProtos.csproj +++ b/csharp/src/Google.Protobuf.Test.TestProtos/Google.Protobuf.Test.TestProtos.csproj @@ -6,7 +6,7 @@ and without the internal visibility from the test project (all of which have caused issues in the past). --> - net45;netstandard1.1;netstandard2.0 + net462;netstandard1.1;netstandard2.0 3.0 ../../keys/Google.Protobuf.snk true diff --git a/csharp/src/Google.Protobuf.Test/ExtensionSetTest.cs b/csharp/src/Google.Protobuf.Test/ExtensionSetTest.cs index 951e856a59..d163810b6e 100644 --- a/csharp/src/Google.Protobuf.Test/ExtensionSetTest.cs +++ b/csharp/src/Google.Protobuf.Test/ExtensionSetTest.cs @@ -1,4 +1,6 @@ -using Google.Protobuf.TestProtos.Proto2; +using System; +using System.Collections; +using Google.Protobuf.TestProtos.Proto2; using NUnit.Framework; using static Google.Protobuf.TestProtos.Proto2.UnittestExtensions; @@ -79,6 +81,63 @@ namespace Google.Protobuf Assert.AreEqual("abcd", ExtensionSet.Get(ref extensionSet, OptionalStringExtension)); } + [Test] + public void GetSingle() + { + var extensionValue = new TestAllTypes.Types.NestedMessage() { Bb = 42 }; + var untypedExtension = new Extension(OptionalNestedMessageExtension.FieldNumber, codec: null); + var wrongTypedExtension = new Extension(OptionalNestedMessageExtension.FieldNumber, codec: null); + + var message = new TestAllExtensions(); + + var value1 = message.GetExtension(untypedExtension); + Assert.IsNull(value1); + + message.SetExtension(OptionalNestedMessageExtension, extensionValue); + var value2 = message.GetExtension(untypedExtension); + Assert.IsNotNull(value2); + + var valueBytes = ((IMessage)value2).ToByteArray(); + var parsedValue = TestProtos.Proto2.TestAllTypes.Types.NestedMessage.Parser.ParseFrom(valueBytes); + Assert.AreEqual(extensionValue, parsedValue); + + var ex = Assert.Throws(() => message.GetExtension(wrongTypedExtension)); + + var expectedMessage = "The stored extension value has a type of 'Google.Protobuf.TestProtos.Proto2.TestAllTypes+Types+NestedMessage, Google.Protobuf.Test.TestProtos, Version=1.0.0.0, Culture=neutral, PublicKeyToken=a7d26565bac4d604'. " + + "This a different from the requested type of 'Google.Protobuf.TestProtos.Proto2.TestAllTypes, Google.Protobuf.Test.TestProtos, Version=1.0.0.0, Culture=neutral, PublicKeyToken=a7d26565bac4d604'."; + Assert.AreEqual(expectedMessage, ex.Message); + } + + [Test] + public void GetRepeated() + { + var extensionValue = new TestAllTypes.Types.NestedMessage() { Bb = 42 }; + var untypedExtension = new Extension(RepeatedNestedMessageExtension.FieldNumber, codec: null); + var wrongTypedExtension = new RepeatedExtension(RepeatedNestedMessageExtension.FieldNumber, codec: null); + + var message = new TestAllExtensions(); + + var value1 = message.GetExtension(untypedExtension); + Assert.IsNull(value1); + + var repeatedField = message.GetOrInitializeExtension(RepeatedNestedMessageExtension); + repeatedField.Add(extensionValue); + + var value2 = message.GetExtension(untypedExtension); + Assert.IsNotNull(value2); + Assert.AreEqual(1, value2.Count); + + var valueBytes = ((IMessage)value2[0]).ToByteArray(); + var parsedValue = TestProtos.Proto2.TestAllTypes.Types.NestedMessage.Parser.ParseFrom(valueBytes); + Assert.AreEqual(extensionValue, parsedValue); + + var ex = Assert.Throws(() => message.GetExtension(wrongTypedExtension)); + + var expectedMessage = "The stored extension value has a type of 'Google.Protobuf.TestProtos.Proto2.TestAllTypes+Types+NestedMessage, Google.Protobuf.Test.TestProtos, Version=1.0.0.0, Culture=neutral, PublicKeyToken=a7d26565bac4d604'. " + + "This a different from the requested type of 'Google.Protobuf.TestProtos.Proto2.TestAllTypes, Google.Protobuf.Test.TestProtos, Version=1.0.0.0, Culture=neutral, PublicKeyToken=a7d26565bac4d604'."; + Assert.AreEqual(expectedMessage, ex.Message); + } + [Test] public void TestEquals() { diff --git a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj index deb17e9f52..641cb0a088 100644 --- a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj +++ b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj @@ -1,7 +1,7 @@  - net451;netcoreapp3.1;net60 + net462;netcoreapp3.1;net60 ../../keys/Google.Protobuf.snk true False diff --git a/csharp/src/Google.Protobuf.Test/RefStructCompatibilityTest.cs b/csharp/src/Google.Protobuf.Test/RefStructCompatibilityTest.cs index f2d9a2d4e2..9dca501523 100644 --- a/csharp/src/Google.Protobuf.Test/RefStructCompatibilityTest.cs +++ b/csharp/src/Google.Protobuf.Test/RefStructCompatibilityTest.cs @@ -60,7 +60,7 @@ namespace Google.Protobuf var currentAssemblyDir = Path.GetDirectoryName(typeof(RefStructCompatibilityTest).GetTypeInfo().Assembly.Location); var testProtosProjectDir = Path.GetFullPath(Path.Combine(currentAssemblyDir, "..", "..", "..", "..", "Google.Protobuf.Test.TestProtos")); - var testProtosOutputDir = (currentAssemblyDir.Contains("bin/Debug/") || currentAssemblyDir.Contains("bin\\Debug\\")) ? "bin\\Debug\\net45" : "bin\\Release\\net45"; + var testProtosOutputDir = (currentAssemblyDir.Contains("bin/Debug/") || currentAssemblyDir.Contains("bin\\Debug\\")) ? "bin\\Debug\\net462" : "bin\\Release\\net462"; // If "ref struct" types are used in the generated code, compilation with an old compiler will fail with the following error: // "XYZ is obsolete: 'Types with embedded references are not supported in this version of your compiler.'" diff --git a/csharp/src/Google.Protobuf/Extension.cs b/csharp/src/Google.Protobuf/Extension.cs index 6dd1ceaa8e..d10a668452 100644 --- a/csharp/src/Google.Protobuf/Extension.cs +++ b/csharp/src/Google.Protobuf/Extension.cs @@ -77,7 +77,7 @@ namespace Google.Protobuf this.codec = codec; } - internal TValue DefaultValue => codec.DefaultValue; + internal TValue DefaultValue => codec != null ? codec.DefaultValue : default(TValue); internal override Type TargetType => typeof(TTarget); diff --git a/csharp/src/Google.Protobuf/ExtensionSet.cs b/csharp/src/Google.Protobuf/ExtensionSet.cs index 895b9ae6ea..4967ef646d 100644 --- a/csharp/src/Google.Protobuf/ExtensionSet.cs +++ b/csharp/src/Google.Protobuf/ExtensionSet.cs @@ -34,6 +34,7 @@ using Google.Protobuf.Collections; using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Security; namespace Google.Protobuf @@ -63,7 +64,39 @@ namespace Google.Protobuf IExtensionValue value; if (TryGetValue(ref set, extension, out value)) { - return ((ExtensionValue)value).GetValue(); + // The stored ExtensionValue can be a different type to what is being requested. + // This happens when the same extension proto is compiled in different assemblies. + // To allow consuming assemblies to still get the value when the TValue type is + // different, this get method: + // 1. Attempts to cast the value to the expected ExtensionValue. + // This is the usual case. It is used first because it avoids possibly boxing the value. + // 2. Fallback to get the value as object from IExtensionValue then casting. + // This allows for someone to specify a TValue of object. They can then convert + // the values to bytes and reparse using expected value. + // 3. If neither of these work, throw a user friendly error that the types aren't compatible. + if (value is ExtensionValue extensionValue) + { + return extensionValue.GetValue(); + } + else if (value.GetValue() is TValue underlyingValue) + { + return underlyingValue; + } + else + { + var valueType = value.GetType().GetTypeInfo(); + if (valueType.IsGenericType && valueType.GetGenericTypeDefinition() == typeof(ExtensionValue<>)) + { + var storedType = valueType.GenericTypeArguments[0]; + throw new InvalidOperationException( + "The stored extension value has a type of '" + storedType.AssemblyQualifiedName + "'. " + + "This a different from the requested type of '" + typeof(TValue).AssemblyQualifiedName + "'."); + } + else + { + throw new InvalidOperationException("Unexpected extension value type: " + valueType.AssemblyQualifiedName); + } + } } else { @@ -79,7 +112,25 @@ namespace Google.Protobuf IExtensionValue value; if (TryGetValue(ref set, extension, out value)) { - return ((RepeatedExtensionValue)value).GetValue(); + if (value is RepeatedExtensionValue extensionValue) + { + return extensionValue.GetValue(); + } + else + { + var valueType = value.GetType().GetTypeInfo(); + if (valueType.IsGenericType && valueType.GetGenericTypeDefinition() == typeof(RepeatedExtensionValue<>)) + { + var storedType = valueType.GenericTypeArguments[0]; + throw new InvalidOperationException( + "The stored extension value has a type of '" + storedType.AssemblyQualifiedName + "'. " + + "This a different from the requested type of '" + typeof(TValue).AssemblyQualifiedName + "'."); + } + else + { + throw new InvalidOperationException("Unexpected extension value type: " + valueType.AssemblyQualifiedName); + } + } } else { diff --git a/csharp/src/Google.Protobuf/ExtensionValue.cs b/csharp/src/Google.Protobuf/ExtensionValue.cs index 5257c4c955..1329b2f4d5 100644 --- a/csharp/src/Google.Protobuf/ExtensionValue.cs +++ b/csharp/src/Google.Protobuf/ExtensionValue.cs @@ -44,6 +44,7 @@ namespace Google.Protobuf void WriteTo(ref WriteContext ctx); int CalculateSize(); bool IsInitialized(); + object GetValue(); } internal sealed class ExtensionValue : IExtensionValue @@ -118,6 +119,8 @@ namespace Google.Protobuf public T GetValue() => field; + object IExtensionValue.GetValue() => field; + public void SetValue(T value) { field = value; @@ -201,6 +204,8 @@ namespace Google.Protobuf public RepeatedField GetValue() => field; + object IExtensionValue.GetValue() => field; + public bool IsInitialized() { for (int i = 0; i < field.Count; i++) diff --git a/docs/options.md b/docs/options.md index dbb3563af6..e745730340 100644 --- a/docs/options.md +++ b/docs/options.md @@ -304,3 +304,8 @@ with info about your project (name and website) so we can add an entry for you. 1. Embedded Proto * Website: https://EmbeddedProto.com * Extension: 1141 + +1. Protoc-gen-fieldmask + * Website: https://github.com/yeqown/protoc-gen-fieldmask + * Extension: 1142 + diff --git a/java/core/src/main/java/com/google/protobuf/TextFormat.java b/java/core/src/main/java/com/google/protobuf/TextFormat.java index 11f89dce70..7337bc22dd 100644 --- a/java/core/src/main/java/com/google/protobuf/TextFormat.java +++ b/java/core/src/main/java/com/google/protobuf/TextFormat.java @@ -989,12 +989,12 @@ public final class TextFormat { } /** Are we at the end of the input? */ - public boolean atEnd() { + boolean atEnd() { return currentToken.length() == 0; } /** Advance to the next token. */ - public void nextToken() { + void nextToken() { previousLine = line; previousColumn = column; @@ -1040,7 +1040,7 @@ public final class TextFormat { * If the next token exactly matches {@code token}, consume it and return {@code true}. * Otherwise, return {@code false} without doing anything. */ - public boolean tryConsume(final String token) { + boolean tryConsume(final String token) { if (currentToken.equals(token)) { nextToken(); return true; @@ -1053,14 +1053,14 @@ public final class TextFormat { * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link * ParseException}. */ - public void consume(final String token) throws ParseException { + void consume(final String token) throws ParseException { if (!tryConsume(token)) { throw parseException("Expected \"" + token + "\"."); } } /** Returns {@code true} if the next token is an integer, but does not consume it. */ - public boolean lookingAtInteger() { + boolean lookingAtInteger() { if (currentToken.length() == 0) { return false; } @@ -1070,7 +1070,7 @@ public final class TextFormat { } /** Returns {@code true} if the current token's text is equal to that specified. */ - public boolean lookingAt(String text) { + boolean lookingAt(String text) { return currentToken.equals(text); } @@ -1078,7 +1078,7 @@ public final class TextFormat { * If the next token is an identifier, consume it and return its value. Otherwise, throw a * {@link ParseException}. */ - public String consumeIdentifier() throws ParseException { + String consumeIdentifier() throws ParseException { for (int i = 0; i < currentToken.length(); i++) { final char c = currentToken.charAt(i); if (('a' <= c && c <= 'z') @@ -1101,7 +1101,7 @@ public final class TextFormat { * If the next token is an identifier, consume it and return {@code true}. Otherwise, return * {@code false} without doing anything. */ - public boolean tryConsumeIdentifier() { + boolean tryConsumeIdentifier() { try { consumeIdentifier(); return true; @@ -1114,7 +1114,7 @@ public final class TextFormat { * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise, * throw a {@link ParseException}. */ - public int consumeInt32() throws ParseException { + int consumeInt32() throws ParseException { try { final int result = parseInt32(currentToken); nextToken(); @@ -1128,7 +1128,7 @@ public final class TextFormat { * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise, * throw a {@link ParseException}. */ - public int consumeUInt32() throws ParseException { + int consumeUInt32() throws ParseException { try { final int result = parseUInt32(currentToken); nextToken(); @@ -1142,7 +1142,7 @@ public final class TextFormat { * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise, * throw a {@link ParseException}. */ - public long consumeInt64() throws ParseException { + long consumeInt64() throws ParseException { try { final long result = parseInt64(currentToken); nextToken(); @@ -1156,7 +1156,7 @@ public final class TextFormat { * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise, * return {@code false} without doing anything. */ - public boolean tryConsumeInt64() { + boolean tryConsumeInt64() { try { consumeInt64(); return true; @@ -1169,7 +1169,7 @@ public final class TextFormat { * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise, * throw a {@link ParseException}. */ - public long consumeUInt64() throws ParseException { + long consumeUInt64() throws ParseException { try { final long result = parseUInt64(currentToken); nextToken(); @@ -1299,7 +1299,7 @@ public final class TextFormat { } /** If the next token is a string, consume it and return true. Otherwise, return false. */ - public boolean tryConsumeString() { + boolean tryConsumeString() { try { consumeString(); return true; @@ -1312,7 +1312,7 @@ public final class TextFormat { * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return * it. Otherwise, throw a {@link ParseException}. */ - public ByteString consumeByteString() throws ParseException { + ByteString consumeByteString() throws ParseException { List list = new ArrayList(); consumeByteString(list); while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { @@ -1350,7 +1350,7 @@ public final class TextFormat { * Returns a {@link ParseException} with the current line and column numbers in the description, * suitable for throwing. */ - public ParseException parseException(final String description) { + ParseException parseException(final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException(line + 1, column + 1, description); } @@ -1359,7 +1359,7 @@ public final class TextFormat { * Returns a {@link ParseException} with the line and column numbers of the previous token in * the description, suitable for throwing. */ - public ParseException parseExceptionPreviousToken(final String description) { + ParseException parseExceptionPreviousToken(final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException(previousLine + 1, previousColumn + 1, description); } @@ -1380,16 +1380,6 @@ public final class TextFormat { return parseException("Couldn't parse number: " + e.getMessage()); } - /** - * Returns a {@link UnknownFieldParseException} with the line and column numbers of the previous - * token in the description, and the unknown field name, suitable for throwing. - */ - public UnknownFieldParseException unknownFieldParseExceptionPreviousToken( - final String unknownField, final String description) { - // Note: People generally prefer one-based line and column numbers. - return new UnknownFieldParseException( - previousLine + 1, previousColumn + 1, unknownField, description); - } } /** Thrown when parsing an invalid text format message. */ diff --git a/java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java b/java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java index 800623ac74..6518372c9a 100644 --- a/java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java +++ b/java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java @@ -64,8 +64,7 @@ public class CheckUtf8Test { public void testParseRequiredStringWithGoodUtf8() throws Exception { ByteString serialized = BytesWrapper.newBuilder().setReq(UTF8_BYTE_STRING).build().toByteString(); - assertThat(StringWrapper.parser().parseFrom(serialized).getReq()) - .isEqualTo(UTF8_BYTE_STRING_TEXT); + assertThat(StringWrapper.parseFrom(serialized).getReq()).isEqualTo(UTF8_BYTE_STRING_TEXT); } @Test diff --git a/java/core/src/test/java/com/google/protobuf/GeneratedMessageTest.java b/java/core/src/test/java/com/google/protobuf/GeneratedMessageTest.java index eb5b7396c9..26d0e4ff34 100644 --- a/java/core/src/test/java/com/google/protobuf/GeneratedMessageTest.java +++ b/java/core/src/test/java/com/google/protobuf/GeneratedMessageTest.java @@ -359,7 +359,7 @@ public class GeneratedMessageTest { @Test public void testParsedMessagesAreImmutable() throws Exception { - TestAllTypes value = TestAllTypes.parser().parseFrom(TestUtil.getAllSet().toByteString()); + TestAllTypes value = TestAllTypes.parseFrom(TestUtil.getAllSet().toByteString()); assertIsUnmodifiable(value.getRepeatedInt32List()); assertIsUnmodifiable(value.getRepeatedInt64List()); assertIsUnmodifiable(value.getRepeatedUint32List()); diff --git a/java/core/src/test/java/com/google/protobuf/MapForProto2LiteTest.java b/java/core/src/test/java/com/google/protobuf/MapForProto2LiteTest.java index 1a767c0dc7..e8b1503734 100644 --- a/java/core/src/test/java/com/google/protobuf/MapForProto2LiteTest.java +++ b/java/core/src/test/java/com/google/protobuf/MapForProto2LiteTest.java @@ -392,21 +392,21 @@ public final class MapForProto2LiteTest { setMapValues(builder); TestMap message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesSet(message); builder = message.toBuilder(); updateMapValues(builder); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesUpdated(message); builder = message.toBuilder(); builder.clear(); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesCleared(message); } @@ -415,7 +415,7 @@ public final class MapForProto2LiteTest { CodedOutputStream output = CodedOutputStream.newInstance(byteArrayOutputStream); bizarroMap.writeTo(output); output.flush(); - return TestMap.parser().parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); + return TestMap.parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); } @Test diff --git a/java/core/src/test/java/com/google/protobuf/MapForProto2Test.java b/java/core/src/test/java/com/google/protobuf/MapForProto2Test.java index 6681724c78..2f74f96ed2 100644 --- a/java/core/src/test/java/com/google/protobuf/MapForProto2Test.java +++ b/java/core/src/test/java/com/google/protobuf/MapForProto2Test.java @@ -534,21 +534,21 @@ public class MapForProto2Test { setMapValuesUsingAccessors(builder); TestMap message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesSet(message); builder = message.toBuilder(); updateMapValuesUsingAccessors(builder); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesUpdated(message); builder = message.toBuilder(); builder.clear(); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesCleared(message); } @@ -557,7 +557,7 @@ public class MapForProto2Test { CodedOutputStream output = CodedOutputStream.newInstance(byteArrayOutputStream); bizarroMap.writeTo(output); output.flush(); - return TestMap.parser().parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); + return TestMap.parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); } @Test diff --git a/java/core/src/test/java/com/google/protobuf/MapLiteTest.java b/java/core/src/test/java/com/google/protobuf/MapLiteTest.java index 349d576ff7..d763f9d512 100644 --- a/java/core/src/test/java/com/google/protobuf/MapLiteTest.java +++ b/java/core/src/test/java/com/google/protobuf/MapLiteTest.java @@ -425,21 +425,21 @@ public final class MapLiteTest { setMapValues(builder); TestMap message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesSet(message); builder = message.toBuilder(); updateMapValues(builder); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesUpdated(message); builder = message.toBuilder(); builder.clear(); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesCleared(message); } @@ -448,7 +448,7 @@ public final class MapLiteTest { CodedOutputStream output = CodedOutputStream.newInstance(byteArrayOutputStream); bizarroMap.writeTo(output); output.flush(); - return TestMap.parser().parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); + return TestMap.parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); } @Test diff --git a/java/core/src/test/java/com/google/protobuf/MapTest.java b/java/core/src/test/java/com/google/protobuf/MapTest.java index 587ebbb5cc..7884497ee4 100644 --- a/java/core/src/test/java/com/google/protobuf/MapTest.java +++ b/java/core/src/test/java/com/google/protobuf/MapTest.java @@ -580,21 +580,21 @@ public class MapTest { setMapValuesUsingAccessors(builder); TestMap message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesSet(message); builder = message.toBuilder(); updateMapValuesUsingAccessors(builder); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesUpdated(message); builder = message.toBuilder(); builder.clear(); message = builder.build(); assertThat(message.toByteString().size()).isEqualTo(message.getSerializedSize()); - message = TestMap.parser().parseFrom(message.toByteString()); + message = TestMap.parseFrom(message.toByteString()); assertMapValuesCleared(message); } @@ -603,7 +603,7 @@ public class MapTest { CodedOutputStream output = CodedOutputStream.newInstance(byteArrayOutputStream); bizarroMap.writeTo(output); output.flush(); - return TestMap.parser().parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); + return TestMap.parseFrom(ByteString.copyFrom(byteArrayOutputStream.toByteArray())); } @Test diff --git a/java/core/src/test/java/com/google/protobuf/ParserLiteTest.java b/java/core/src/test/java/com/google/protobuf/ParserLiteTest.java index 6f6d26b1ca..dc31b38b5c 100644 --- a/java/core/src/test/java/com/google/protobuf/ParserLiteTest.java +++ b/java/core/src/test/java/com/google/protobuf/ParserLiteTest.java @@ -192,7 +192,7 @@ public class ParserLiteTest { // Parse TestParsingMergeLite. ExtensionRegistryLite registry = ExtensionRegistryLite.newInstance(); UnittestLite.registerAllExtensions(registry); - TestParsingMergeLite parsingMerge = TestParsingMergeLite.parser().parseFrom(data, registry); + TestParsingMergeLite parsingMerge = TestParsingMergeLite.parseFrom(data, registry); // Required and optional fields should be merged. assertMessageMerged(parsingMerge.getRequiredAllTypes()); diff --git a/java/core/src/test/java/com/google/protobuf/ParserTest.java b/java/core/src/test/java/com/google/protobuf/ParserTest.java index f4cf529e90..69c5795c1f 100644 --- a/java/core/src/test/java/com/google/protobuf/ParserTest.java +++ b/java/core/src/test/java/com/google/protobuf/ParserTest.java @@ -195,8 +195,7 @@ public class ParserTest { @Test public void testParseUnknownFields() throws Exception { // All fields will be treated as unknown fields in emptyMessage. - TestEmptyMessage emptyMessage = - TestEmptyMessage.parser().parseFrom(TestUtil.getAllSet().toByteString()); + TestEmptyMessage emptyMessage = TestEmptyMessage.parseFrom(TestUtil.getAllSet().toByteString()); assertThat(emptyMessage.toByteString()).isEqualTo(TestUtil.getAllSet().toByteString()); } @@ -278,7 +277,7 @@ public class ParserTest { // Parse TestParsingMerge. ExtensionRegistry registry = ExtensionRegistry.newInstance(); UnittestProto.registerAllExtensions(registry); - TestParsingMerge parsingMerge = TestParsingMerge.parser().parseFrom(data, registry); + TestParsingMerge parsingMerge = TestParsingMerge.parseFrom(data, registry); // Required and optional fields should be merged. assertMessageMerged(parsingMerge.getRequiredAllTypes()); diff --git a/python/google/protobuf/internal/unknown_fields_test.py b/python/google/protobuf/internal/unknown_fields_test.py index 7dcfff5383..b639088a3b 100644 --- a/python/google/protobuf/internal/unknown_fields_test.py +++ b/python/google/protobuf/internal/unknown_fields_test.py @@ -289,10 +289,6 @@ class UnknownFieldsAccessorsTest(unittest.TestCase): unknown_field_set = unknown_fields.UnknownFieldSet(destination) self.assertEqual(0, len(unknown_field_set)) destination.ParseFromString(message.SerializeToString()) - # TODO(jieluo): add this back after implement new cpp unknown fields - # b/217277954 - if api_implementation.Type() == 'cpp': - return self.assertEqual(0, len(unknown_field_set)) unknown_field_set = unknown_fields.UnknownFieldSet(destination) self.assertEqual(2, len(unknown_field_set)) @@ -310,10 +306,6 @@ class UnknownFieldsAccessorsTest(unittest.TestCase): self.empty_message.Clear() # All cleared, even unknown fields. self.assertEqual(self.empty_message.SerializeToString(), b'') - # TODO(jieluo): add this back after implement new cpp unknown fields - # b/217277954 - if api_implementation.Type() == 'cpp': - return self.assertEqual(len(unknown_field_set), 97) @unittest.skipIf((sys.version_info.major, sys.version_info.minor) < (3, 4), @@ -345,10 +337,6 @@ class UnknownFieldsAccessorsTest(unittest.TestCase): self.assertEqual(1, len(sub_unknown_fields)) self.assertEqual(sub_unknown_fields[0].data, 123) destination.Clear() - # TODO(jieluo): add this back after implement new cpp unknown fields - # b/217277954 - if api_implementation.Type() == 'cpp': - return self.assertEqual(1, len(sub_unknown_fields)) self.assertEqual(sub_unknown_fields[0].data, 123) message.Clear() @@ -372,10 +360,6 @@ class UnknownFieldsAccessorsTest(unittest.TestCase): destination.ParseFromString(message.SerializeToString()) unknown_field = unknown_fields.UnknownFieldSet(destination)[0] destination.Clear() - # TODO(jieluo): add this back after implement new cpp unknown fields - # b/217277954 - if api_implementation.Type() == 'cpp': - return self.assertEqual(unknown_field.data, 123) def testUnknownExtensions(self): @@ -416,6 +400,7 @@ class UnknownEnumValuesTest(unittest.TestCase): def CheckUnknownField(self, name, expected_value): field_descriptor = self.descriptor.fields_by_name[name] unknown_field_set = unknown_fields.UnknownFieldSet(self.missing_message) + self.assertIsInstance(unknown_field_set, unknown_fields.UnknownFieldSet) count = 0 for field in unknown_field_set: if field.field_number == field_descriptor.number: diff --git a/python/google/protobuf/internal/well_known_types.py b/python/google/protobuf/internal/well_known_types.py index b581ab750a..8881d758a5 100644 --- a/python/google/protobuf/internal/well_known_types.py +++ b/python/google/protobuf/internal/well_known_types.py @@ -868,6 +868,7 @@ class ListValue(object): collections.abc.MutableSequence.register(ListValue) +# LINT.IfChange(wktbases) WKTBASES = { 'google.protobuf.Any': Any, 'google.protobuf.Duration': Duration, @@ -876,3 +877,4 @@ WKTBASES = { 'google.protobuf.Struct': Struct, 'google.protobuf.Timestamp': Timestamp, } +# LINT.ThenChange(//depot/google.protobuf/compiler/python/pyi_generator.cc:wktbases) diff --git a/python/google/protobuf/pyext/message.cc b/python/google/protobuf/pyext/message.cc index 920c17d8d1..2c4a9573e7 100644 --- a/python/google/protobuf/pyext/message.cc +++ b/python/google/protobuf/pyext/message.cc @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -2424,7 +2425,7 @@ static PyObject* GetExtensionDict(CMessage* self, void *closure) { return reinterpret_cast(extension_dict); } -static PyObject* UnknownFieldSet(CMessage* self) { +static PyObject* GetUnknownFields(CMessage* self) { if (self->unknown_field_set == nullptr) { self->unknown_field_set = unknown_fields::NewPyUnknownFields(self); } else { @@ -2493,7 +2494,7 @@ static PyMethodDef Methods[] = { "Serializes the message to a string, only for initialized messages."}, {"SetInParent", (PyCFunction)SetInParent, METH_NOARGS, "Sets the has bit of the given field in its parent message."}, - {"UnknownFields", (PyCFunction)UnknownFieldSet, METH_NOARGS, + {"UnknownFields", (PyCFunction)GetUnknownFields, METH_NOARGS, "Parse unknown field set"}, {"WhichOneof", (PyCFunction)WhichOneof, METH_O, "Returns the name of the field set inside a oneof, " @@ -2970,15 +2971,20 @@ bool InitProto2MessageModule(PyObject *m) { return false; } + if (PyType_Ready(&PyUnknownFieldSet_Type) < 0) { + return false; + } + PyModule_AddObject(m, "UnknownFieldSet", - reinterpret_cast(&PyUnknownFields_Type)); + reinterpret_cast(&PyUnknownFieldSet_Type)); if (PyType_Ready(&PyUnknownFieldRef_Type) < 0) { return false; } - PyModule_AddObject(m, "UnknownField", - reinterpret_cast(&PyUnknownFieldRef_Type)); + if (PyType_Ready(&PyUnknownField_Type) < 0) { + return false; + } // Initialize Map container types. if (!InitMapContainers()) { diff --git a/python/google/protobuf/pyext/unknown_field_set.cc b/python/google/protobuf/pyext/unknown_field_set.cc new file mode 100644 index 0000000000..76be8fd7b4 --- /dev/null +++ b/python/google/protobuf/pyext/unknown_field_set.cc @@ -0,0 +1,353 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace google { +namespace protobuf { +namespace python { + +namespace unknown_field_set { + +static Py_ssize_t Len(PyObject* pself) { + PyUnknownFieldSet* self = reinterpret_cast(pself); + if (self->fields == nullptr) { + PyErr_Format(PyExc_ValueError, "UnknownFieldSet does not exist. "); + return -1; + } + return self->fields->field_count(); +} + +PyObject* NewPyUnknownField(PyUnknownFieldSet* parent, Py_ssize_t index); + +static PyObject* Item(PyObject* pself, Py_ssize_t index) { + PyUnknownFieldSet* self = reinterpret_cast(pself); + if (self->fields == nullptr) { + PyErr_Format(PyExc_ValueError, "UnknownFieldSet does not exist. "); + return nullptr; + } + Py_ssize_t total_size = self->fields->field_count(); + if (index < 0) { + index = total_size + index; + } + if (index < 0 || index >= total_size) { + PyErr_Format(PyExc_IndexError, "index (%zd) out of range", index); + return nullptr; + } + return unknown_field_set::NewPyUnknownField(self, index); +} + +PyObject* New(PyTypeObject* type, PyObject* args, PyObject* kwargs) { + if (args == nullptr || PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, + "Must provide a message to create UnknownFieldSet"); + return nullptr; + } + + PyObject* c_message; + if (!PyArg_ParseTuple(args, "O", &c_message)) { + PyErr_SetString(PyExc_TypeError, + "Must provide a message to create UnknownFieldSet"); + return nullptr; + } + + if (!PyObject_TypeCheck(c_message, CMessage_Type)) { + PyErr_Format(PyExc_TypeError, + "Parameter to UnknownFieldSet() must be a message " + "got %s.", + Py_TYPE(c_message)->tp_name); + return nullptr; + } + + PyUnknownFieldSet* self = reinterpret_cast( + PyType_GenericAlloc(&PyUnknownFieldSet_Type, 0)); + if (self == nullptr) { + return nullptr; + } + + // Top UnknownFieldSet should set parent nullptr. + self->parent = nullptr; + + // Copy c_message's UnknownFieldSet. + Message* message = reinterpret_cast(c_message)->message; + const Reflection* reflection = message->GetReflection(); + self->fields = new google::protobuf::UnknownFieldSet; + self->fields->MergeFrom(reflection->GetUnknownFields(*message)); + return reinterpret_cast(self); +} + +PyObject* NewPyUnknownField(PyUnknownFieldSet* parent, Py_ssize_t index) { + PyUnknownField* self = reinterpret_cast( + PyType_GenericAlloc(&PyUnknownField_Type, 0)); + if (self == nullptr) { + return nullptr; + } + + Py_INCREF(parent); + self->parent = parent; + self->index = index; + + return reinterpret_cast(self); +} + +static void Dealloc(PyObject* pself) { + PyUnknownFieldSet* self = reinterpret_cast(pself); + if (self->parent == nullptr) { + delete self->fields; + } + auto* py_type = Py_TYPE(pself); + self->~PyUnknownFieldSet(); + py_type->tp_free(pself); +} + +static PySequenceMethods SqMethods = { + Len, /* sq_length */ + nullptr, /* sq_concat */ + nullptr, /* sq_repeat */ + Item, /* sq_item */ + nullptr, /* sq_slice */ + nullptr, /* sq_ass_item */ +}; + +} // namespace unknown_field_set + +PyTypeObject PyUnknownFieldSet_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) FULL_MODULE_NAME + ".PyUnknownFieldSet", // tp_name + sizeof(PyUnknownFieldSet), // tp_basicsize + 0, // tp_itemsize + unknown_field_set::Dealloc, // tp_dealloc +#if PY_VERSION_HEX < 0x03080000 + nullptr, // tp_print +#else + 0, // tp_vectorcall_offset +#endif + nullptr, // tp_getattr + nullptr, // tp_setattr + nullptr, // tp_compare + nullptr, // tp_repr + nullptr, // tp_as_number + &unknown_field_set::SqMethods, // tp_as_sequence + nullptr, // tp_as_mapping + PyObject_HashNotImplemented, // tp_hash + nullptr, // tp_call + nullptr, // tp_str + nullptr, // tp_getattro + nullptr, // tp_setattro + nullptr, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "unknown field set", // tp_doc + nullptr, // tp_traverse + nullptr, // tp_clear + nullptr, // tp_richcompare + 0, // tp_weaklistoffset + nullptr, // tp_iter + nullptr, // tp_iternext + nullptr, // tp_methods + nullptr, // tp_members + nullptr, // tp_getset + nullptr, // tp_base + nullptr, // tp_dict + nullptr, // tp_descr_get + nullptr, // tp_descr_set + 0, // tp_dictoffset + nullptr, // tp_init + nullptr, // tp_alloc + unknown_field_set::New, // tp_new +}; + +namespace unknown_field { +static PyObject* PyUnknownFieldSet_FromUnknownFieldSet( + PyUnknownFieldSet* parent, const UnknownFieldSet& fields) { + PyUnknownFieldSet* self = reinterpret_cast( + PyType_GenericAlloc(&PyUnknownFieldSet_Type, 0)); + if (self == nullptr) { + return nullptr; + } + + Py_INCREF(parent); + self->parent = parent; + self->fields = const_cast(&fields); + + return reinterpret_cast(self); +} + +const UnknownField* GetUnknownField(PyUnknownField* self) { + const UnknownFieldSet* fields = self->parent->fields; + if (fields == nullptr) { + PyErr_Format(PyExc_ValueError, "UnknownField does not exist. "); + return nullptr; + } + Py_ssize_t total_size = fields->field_count(); + if (self->index >= total_size) { + PyErr_Format(PyExc_ValueError, "UnknownField does not exist. "); + return nullptr; + } + return &fields->field(self->index); +} + +static PyObject* GetFieldNumber(PyUnknownField* self, void* closure) { + const UnknownField* unknown_field = GetUnknownField(self); + if (unknown_field == nullptr) { + return nullptr; + } + return PyLong_FromLong(unknown_field->number()); +} + +using internal::WireFormatLite; +static PyObject* GetWireType(PyUnknownField* self, void* closure) { + const UnknownField* unknown_field = GetUnknownField(self); + if (unknown_field == nullptr) { + return nullptr; + } + + // Assign a default value to suppress may-uninitialized warnings (errors + // when built in some places). + WireFormatLite::WireType wire_type = WireFormatLite::WIRETYPE_VARINT; + switch (unknown_field->type()) { + case UnknownField::TYPE_VARINT: + wire_type = WireFormatLite::WIRETYPE_VARINT; + break; + case UnknownField::TYPE_FIXED32: + wire_type = WireFormatLite::WIRETYPE_FIXED32; + break; + case UnknownField::TYPE_FIXED64: + wire_type = WireFormatLite::WIRETYPE_FIXED64; + break; + case UnknownField::TYPE_LENGTH_DELIMITED: + wire_type = WireFormatLite::WIRETYPE_LENGTH_DELIMITED; + break; + case UnknownField::TYPE_GROUP: + wire_type = WireFormatLite::WIRETYPE_START_GROUP; + break; + } + return PyLong_FromLong(wire_type); +} + +static PyObject* GetData(PyUnknownField* self, void* closure) { + const UnknownField* field = GetUnknownField(self); + if (field == nullptr) { + return nullptr; + } + PyObject* data = nullptr; + switch (field->type()) { + case UnknownField::TYPE_VARINT: + data = PyLong_FromUnsignedLongLong(field->varint()); + break; + case UnknownField::TYPE_FIXED32: + data = PyLong_FromUnsignedLong(field->fixed32()); + break; + case UnknownField::TYPE_FIXED64: + data = PyLong_FromUnsignedLongLong(field->fixed64()); + break; + case UnknownField::TYPE_LENGTH_DELIMITED: + data = PyBytes_FromStringAndSize(field->length_delimited().data(), + field->GetLengthDelimitedSize()); + break; + case UnknownField::TYPE_GROUP: + data = + PyUnknownFieldSet_FromUnknownFieldSet(self->parent, field->group()); + break; + } + return data; +} + +static void Dealloc(PyObject* pself) { + PyUnknownField* self = reinterpret_cast(pself); + Py_CLEAR(self->parent); +} + +static PyGetSetDef Getters[] = { + {"field_number", (getter)GetFieldNumber, nullptr}, + {"wire_type", (getter)GetWireType, nullptr}, + {"data", (getter)GetData, nullptr}, + {nullptr}, +}; + +} // namespace unknown_field + +PyTypeObject PyUnknownField_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) FULL_MODULE_NAME + ".PyUnknownField", // tp_name + sizeof(PyUnknownField), // tp_basicsize + 0, // tp_itemsize + unknown_field::Dealloc, // tp_dealloc +#if PY_VERSION_HEX < 0x03080000 + nullptr, // tp_print +#else + 0, // tp_vectorcall_offset +#endif + nullptr, // tp_getattr + nullptr, // tp_setattr + nullptr, // tp_compare + nullptr, // tp_repr + nullptr, // tp_as_number + nullptr, // tp_as_sequence + nullptr, // tp_as_mapping + PyObject_HashNotImplemented, // tp_hash + nullptr, // tp_call + nullptr, // tp_str + nullptr, // tp_getattro + nullptr, // tp_setattro + nullptr, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "unknown field", // tp_doc + nullptr, // tp_traverse + nullptr, // tp_clear + nullptr, // tp_richcompare + 0, // tp_weaklistoffset + nullptr, // tp_iter + nullptr, // tp_iternext + nullptr, // tp_methods + nullptr, // tp_members + unknown_field::Getters, // tp_getset + nullptr, // tp_base + nullptr, // tp_dict + nullptr, // tp_descr_get + nullptr, // tp_descr_set + 0, // tp_dictoffset + nullptr, // tp_init +}; + +} // namespace python +} // namespace protobuf +} // namespace google diff --git a/python/google/protobuf/pyext/unknown_field_set.h b/python/google/protobuf/pyext/unknown_field_set.h new file mode 100644 index 0000000000..3fa764d01e --- /dev/null +++ b/python/google/protobuf/pyext/unknown_field_set.h @@ -0,0 +1,78 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_PROTOBUF_PYTHON_CPP_UNKNOWN_FIELD_SET_H__ +#define GOOGLE_PROTOBUF_PYTHON_CPP_UNKNOWN_FIELD_SET_H__ + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include + +#include + +namespace google { +namespace protobuf { + +class UnknownField; +class UnknownFieldSet; + +namespace python { +struct CMessage; + +struct PyUnknownFieldSet { + PyObject_HEAD; + // If parent is nullptr, it is a top UnknownFieldSet. + PyUnknownFieldSet* parent; + + // Top UnknownFieldSet owns fields pointer. Sub UnknownFieldSet + // does not own fields pointer. + UnknownFieldSet* fields; +}; + +struct PyUnknownField { + PyObject_HEAD; + // Every Python PyUnknownField holds a reference to its parent + // PyUnknownFieldSet in order to keep it alive. + PyUnknownFieldSet* parent; + + // The UnknownField index in UnknownFieldSet. + Py_ssize_t index; +}; + +extern PyTypeObject PyUnknownFieldSet_Type; +extern PyTypeObject PyUnknownField_Type; + +} // namespace python +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_PYTHON_CPP_UNKNOWN_FIELD_SET_H__ diff --git a/python/google/protobuf/unknown_fields.py b/python/google/protobuf/unknown_fields.py index 147b40bff7..8ae3b7ea2e 100644 --- a/python/google/protobuf/unknown_fields.py +++ b/python/google/protobuf/unknown_fields.py @@ -40,16 +40,15 @@ Simple usage example: from google.protobuf.internal import api_implementation -from google.protobuf.internal import decoder -from google.protobuf.internal import wire_format +if api_implementation.Type() == 'cpp': + from google.protobuf.pyext import _message # pylint: disable=g-import-not-at-top +else: + from google.protobuf.internal import decoder # pylint: disable=g-import-not-at-top + from google.protobuf.internal import wire_format # pylint: disable=g-import-not-at-top if api_implementation.Type() == 'cpp': - def UnknownFieldSet(msg): - # New UnknownFieldSet in cpp extension has not implemented yet. Fall - # back to old API - # TODO(jieluo): Add UnknownFieldSet for cpp extension. - return msg.UnknownFields() + UnknownFieldSet = _message.UnknownFieldSet else: class UnknownField: diff --git a/src/google/protobuf/compiler/java/java_enum.cc b/src/google/protobuf/compiler/java/java_enum.cc index 0d9a71b99d..19722654ad 100644 --- a/src/google/protobuf/compiler/java/java_enum.cc +++ b/src/google/protobuf/compiler/java/java_enum.cc @@ -45,6 +45,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -390,3 +393,5 @@ bool EnumGenerator::CanUseEnumValues() { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_enum_field.cc b/src/google/protobuf/compiler/java/java_enum_field.cc index 0e6fb44d00..9ef97b3449 100644 --- a/src/google/protobuf/compiler/java/java_enum_field.cc +++ b/src/google/protobuf/compiler/java/java_enum_field.cc @@ -48,6 +48,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -859,7 +862,7 @@ void RepeatedImmutableEnumFieldGenerator::GenerateBuilderMembers( "}\n"); printer->Annotate("{", "}", descriptor_); WriteFieldEnumValueAccessorDocComment(printer, descriptor_, - LIST_INDEXED_GETTER, + LIST_INDEXED_SETTER, /* builder */ true); printer->Print( variables_, @@ -1174,3 +1177,5 @@ std::string RepeatedImmutableEnumFieldGenerator::GetBoxedType() const { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_enum_field_lite.cc b/src/google/protobuf/compiler/java/java_enum_field_lite.cc index 27b62ac523..db13d1d81d 100644 --- a/src/google/protobuf/compiler/java/java_enum_field_lite.cc +++ b/src/google/protobuf/compiler/java/java_enum_field_lite.cc @@ -48,6 +48,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -916,3 +919,5 @@ std::string RepeatedImmutableEnumFieldLiteGenerator::GetBoxedType() const { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_extension.cc b/src/google/protobuf/compiler/java/java_extension.cc index 6ebca41d47..b37e93874f 100644 --- a/src/google/protobuf/compiler/java/java_extension.cc +++ b/src/google/protobuf/compiler/java/java_extension.cc @@ -41,6 +41,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -170,3 +173,5 @@ int ImmutableExtensionGenerator::GenerateRegistrationCode( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_extension_lite.cc b/src/google/protobuf/compiler/java/java_extension_lite.cc index d84ee2754b..cac29f6692 100644 --- a/src/google/protobuf/compiler/java/java_extension_lite.cc +++ b/src/google/protobuf/compiler/java/java_extension_lite.cc @@ -37,6 +37,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -113,3 +116,5 @@ int ImmutableExtensionLiteGenerator::GenerateRegistrationCode( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_file.cc b/src/google/protobuf/compiler/java/java_file.cc index 7dbf64d817..d9c98f65aa 100644 --- a/src/google/protobuf/compiler/java/java_file.cc +++ b/src/google/protobuf/compiler/java/java_file.cc @@ -54,6 +54,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -732,3 +735,5 @@ bool FileGenerator::ShouldIncludeDependency(const FileDescriptor* descriptor, } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_helpers.cc b/src/google/protobuf/compiler/java/java_helpers.cc index f06f30e5dc..d5433e7e3e 100644 --- a/src/google/protobuf/compiler/java/java_helpers.cc +++ b/src/google/protobuf/compiler/java/java_helpers.cc @@ -49,6 +49,9 @@ #include #include // for hash +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -1109,3 +1112,5 @@ void EscapeUtf16ToString(uint16_t code, std::string* output) { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_map_field.cc b/src/google/protobuf/compiler/java/java_map_field.cc index 606d26e93d..0daab8088a 100644 --- a/src/google/protobuf/compiler/java/java_map_field.cc +++ b/src/google/protobuf/compiler/java/java_map_field.cc @@ -36,6 +36,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -887,3 +890,5 @@ std::string ImmutableMapFieldGenerator::GetBoxedType() const { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_map_field_lite.cc b/src/google/protobuf/compiler/java/java_map_field_lite.cc index f624522406..d5d6fb43e1 100644 --- a/src/google/protobuf/compiler/java/java_map_field_lite.cc +++ b/src/google/protobuf/compiler/java/java_map_field_lite.cc @@ -38,6 +38,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -920,3 +923,5 @@ std::string ImmutableMapFieldLiteGenerator::GetBoxedType() const { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_message.cc b/src/google/protobuf/compiler/java/java_message.cc index 9918abc04f..3149d896f1 100644 --- a/src/google/protobuf/compiler/java/java_message.cc +++ b/src/google/protobuf/compiler/java/java_message.cc @@ -56,6 +56,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -1749,3 +1752,5 @@ void ImmutableMessageGenerator::GenerateAnyMethods(io::Printer* printer) { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_message_builder.cc b/src/google/protobuf/compiler/java/java_message_builder.cc index 4d46669a1f..7ed785038a 100644 --- a/src/google/protobuf/compiler/java/java_message_builder.cc +++ b/src/google/protobuf/compiler/java/java_message_builder.cc @@ -53,6 +53,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -710,3 +713,5 @@ void MessageBuilderGenerator::GenerateIsInitialized(io::Printer* printer) { } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_message_builder_lite.cc b/src/google/protobuf/compiler/java/java_message_builder_lite.cc index b8136e3de9..4fcfca9861 100644 --- a/src/google/protobuf/compiler/java/java_message_builder_lite.cc +++ b/src/google/protobuf/compiler/java/java_message_builder_lite.cc @@ -53,6 +53,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -149,3 +152,5 @@ void MessageBuilderLiteGenerator::GenerateCommonBuilderMethods( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_message_field.cc b/src/google/protobuf/compiler/java/java_message_field.cc index 694218c1d4..b7e82fc3ce 100644 --- a/src/google/protobuf/compiler/java/java_message_field.cc +++ b/src/google/protobuf/compiler/java/java_message_field.cc @@ -45,6 +45,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -1504,3 +1507,5 @@ void RepeatedImmutableMessageFieldGenerator::GenerateKotlinDslMembers( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_message_field_lite.cc b/src/google/protobuf/compiler/java/java_message_field_lite.cc index 4f4265fdf8..fbae6fe475 100644 --- a/src/google/protobuf/compiler/java/java_message_field_lite.cc +++ b/src/google/protobuf/compiler/java/java_message_field_lite.cc @@ -46,6 +46,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -891,3 +894,5 @@ void RepeatedImmutableMessageFieldLiteGenerator::GenerateKotlinDslMembers( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_message_lite.cc b/src/google/protobuf/compiler/java/java_message_lite.cc index 8073bac090..fedf14ec1b 100644 --- a/src/google/protobuf/compiler/java/java_message_lite.cc +++ b/src/google/protobuf/compiler/java/java_message_lite.cc @@ -56,6 +56,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -996,3 +999,5 @@ void ImmutableMessageLiteGenerator::GenerateKotlinExtensions( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_name_resolver.cc b/src/google/protobuf/compiler/java/java_name_resolver.cc index 08c009b155..f088d6cfa0 100644 --- a/src/google/protobuf/compiler/java/java_name_resolver.cc +++ b/src/google/protobuf/compiler/java/java_name_resolver.cc @@ -38,6 +38,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -378,3 +381,5 @@ std::string ClassNameResolver::GetDowngradedClassName( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/java/java_name_resolver.h b/src/google/protobuf/compiler/java/java_name_resolver.h index eddf23b045..103cace43e 100644 --- a/src/google/protobuf/compiler/java/java_name_resolver.h +++ b/src/google/protobuf/compiler/java/java_name_resolver.h @@ -36,6 +36,9 @@ #include +// Must be last. +#include + namespace google { namespace protobuf { class Descriptor; @@ -151,4 +154,6 @@ class ClassNameResolver { } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_COMPILER_JAVA_NAME_RESOLVER_H__ diff --git a/src/google/protobuf/compiler/java/java_service.cc b/src/google/protobuf/compiler/java/java_service.cc index 68b915bce3..f8cdfeb096 100644 --- a/src/google/protobuf/compiler/java/java_service.cc +++ b/src/google/protobuf/compiler/java/java_service.cc @@ -41,6 +41,9 @@ #include #include +// Must be last. +#include + namespace google { namespace protobuf { namespace compiler { @@ -472,3 +475,5 @@ void ImmutableServiceGenerator::GenerateBlockingMethodSignature( } // namespace compiler } // namespace protobuf } // namespace google + +#include diff --git a/src/google/protobuf/compiler/python/python_helpers.cc b/src/google/protobuf/compiler/python/python_helpers.cc index bdda888f29..a836eeab8c 100644 --- a/src/google/protobuf/compiler/python/python_helpers.cc +++ b/src/google/protobuf/compiler/python/python_helpers.cc @@ -62,7 +62,7 @@ const char* const kKeywords[] = { "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", - "return", "try", "while", "with", "yield", "print", + "return", "try", "while", "with", "yield", }; const char* const* kKeywordsEnd = kKeywords + (sizeof(kKeywords) / sizeof(kKeywords[0])); diff --git a/src/google/protobuf/compiler/python/python_pyi_generator.cc b/src/google/protobuf/compiler/python/python_pyi_generator.cc index 3aea545f92..4efbff54c5 100644 --- a/src/google/protobuf/compiler/python/python_pyi_generator.cc +++ b/src/google/protobuf/compiler/python/python_pyi_generator.cc @@ -64,12 +64,21 @@ void PyiGenerator::PrintItemMap( } template -std::string PyiGenerator::ModuleLevelName(const DescriptorT& descriptor) const { +std::string PyiGenerator::ModuleLevelName( + const DescriptorT& descriptor, + const std::map& import_map) const { std::string name = NamePrefixedWithNestedTypes(descriptor, "."); if (descriptor.file() != file_) { - std::string module_name = ModuleName(descriptor.file()->name()); - std::vector tokens = Split(module_name, "."); - name = "_" + tokens.back() + "." + name; + std::string module_alias; + std::string filename = descriptor.file()->name(); + if (import_map.find(filename) == import_map.end()) { + std::string module_name = ModuleName(descriptor.file()->name()); + std::vector tokens = Split(module_name, "."); + module_alias = "_" + tokens.back(); + } else { + module_alias = import_map.at(filename); + } + name = module_alias + "." + name; } return name; } @@ -82,9 +91,22 @@ struct ImportModules { bool has_extendable = false; // _python_message bool has_mapping = false; // typing.Mapping bool has_optional = false; // typing.Optional - bool has_union = false; // typing.Uion + bool has_union = false; // typing.Union + bool has_well_known_type = false; }; +// Checks whether a descriptor name matches a well-known type. +bool IsWellKnownType(const std::string& name) { + // LINT.IfChange(wktbases) + return (name == "google.protobuf.Any" || + name == "google.protobuf.Duration" || + name == "google.protobuf.FieldMask" || + name == "google.protobuf.ListValue" || + name == "google.protobuf.Struct" || + name == "google.protobuf.Timestamp"); + // LINT.ThenChange(//depot/google3/net/proto2/python/internal/well_known_types.py:wktbases) +} + // Checks what modules should be imported for this message // descriptor. void CheckImportModules(const Descriptor* descriptor, @@ -95,6 +117,9 @@ void CheckImportModules(const Descriptor* descriptor, if (descriptor->enum_type_count() > 0) { import_modules->has_enums = true; } + if (IsWellKnownType(descriptor->full_name())) { + import_modules->has_well_known_type = true; + } for (int i = 0; i < descriptor->field_count(); ++i) { const FieldDescriptor* field = descriptor->field(i); if (IsPythonKeyword(field->name())) { @@ -129,23 +154,44 @@ void CheckImportModules(const Descriptor* descriptor, } } +void PyiGenerator::PrintImportForDescriptor( + const FileDescriptor& desc, + std::map* import_map, + std::set* seen_aliases) const { + const std::string& filename = desc.name(); + std::string module_name = StrippedModuleName(filename); + size_t last_dot_pos = module_name.rfind('.'); + std::string import_statement; + if (last_dot_pos == std::string::npos) { + import_statement = "import " + module_name; + } else { + import_statement = "from " + module_name.substr(0, last_dot_pos) + + " import " + module_name.substr(last_dot_pos + 1); + module_name = module_name.substr(last_dot_pos + 1); + } + std::string alias = "_" + module_name; + // Generate a unique alias by adding _1 suffixes until we get an unused alias. + while (seen_aliases->find(alias) != seen_aliases->end()) { + alias = alias + "_1"; + } + printer_->Print("$statement$ as $alias$\n", "statement", + import_statement, "alias", alias); + (*import_map)[filename] = alias; + seen_aliases->insert(alias); +} + void PyiGenerator::PrintImports( - std::map* item_map) const { + std::map* item_map, + std::map* import_map) const { // Prints imported dependent _pb2 files. + std::set seen_aliases; for (int i = 0; i < file_->dependency_count(); ++i) { - const std::string& filename = file_->dependency(i)->name(); - std::string module_name = StrippedModuleName(filename); - size_t last_dot_pos = module_name.rfind('.'); - std::string import_statement; - if (last_dot_pos == std::string::npos) { - import_statement = "import " + module_name; - } else { - import_statement = "from " + module_name.substr(0, last_dot_pos) + - " import " + module_name.substr(last_dot_pos + 1); - module_name = module_name.substr(last_dot_pos + 1); + const FileDescriptor* dep = file_->dependency(i); + PrintImportForDescriptor(*dep, import_map, &seen_aliases); + for (int j = 0; j < dep->public_dependency_count(); ++j) { + PrintImportForDescriptor( + *dep->public_dependency(j), import_map, &seen_aliases); } - printer_->Print("$statement$ as _$module_name$\n", "statement", - import_statement, "module_name", module_name); } // Checks what modules should be imported. @@ -177,6 +223,11 @@ void PyiGenerator::PrintImports( "from google.protobuf.internal import python_message" " as _python_message\n"); } + if (import_modules.has_well_known_type) { + printer_->Print( + "from google.protobuf.internal import well_known_types" + " as _well_known_types\n"); + } printer_->Print( "from google.protobuf import" " descriptor as _descriptor\n"); @@ -190,21 +241,18 @@ void PyiGenerator::PrintImports( " _service\n"); } printer_->Print("from typing import "); - printer_->Print("ClassVar"); + printer_->Print("ClassVar as _ClassVar"); if (import_modules.has_iterable) { - printer_->Print(", Iterable"); + printer_->Print(", Iterable as _Iterable"); } if (import_modules.has_mapping) { - printer_->Print(", Mapping"); + printer_->Print(", Mapping as _Mapping"); } if (import_modules.has_optional) { - printer_->Print(", Optional"); - } - if (file_->service_count() > 0) { - printer_->Print(", Text"); + printer_->Print(", Optional as _Optional"); } if (import_modules.has_union) { - printer_->Print(", Union"); + printer_->Print(", Union as _Union"); } printer_->Print("\n\n"); @@ -229,7 +277,7 @@ void PyiGenerator::PrintImports( const EnumDescriptor* enum_descriptor = public_dep->enum_type(i); for (int j = 0; j < enum_descriptor->value_count(); ++j) { (*item_map)[enum_descriptor->value(j)->name()] = - ModuleLevelName(*enum_descriptor); + ModuleLevelName(*enum_descriptor, *import_map); } } // Top level extensions for public imports @@ -248,9 +296,10 @@ void PyiGenerator::PrintEnum(const EnumDescriptor& enum_descriptor) const { // Adds enum value to item map which will be ordered and printed later. void PyiGenerator::AddEnumValue( const EnumDescriptor& enum_descriptor, - std::map* item_map) const { + std::map* item_map, + const std::map& import_map) const { // enum values - std::string module_enum_name = ModuleLevelName(enum_descriptor); + std::string module_enum_name = ModuleLevelName(enum_descriptor, import_map); for (int j = 0; j < enum_descriptor.value_count(); ++j) { const EnumValueDescriptor* value_descriptor = enum_descriptor.value(j); (*item_map)[value_descriptor->name()] = module_enum_name; @@ -275,13 +324,15 @@ void PyiGenerator::AddExtensions( const FieldDescriptor* extension_field = descriptor.extension(i); std::string constant_name = extension_field->name() + "_FIELD_NUMBER"; ToUpper(&constant_name); - (*item_map)[constant_name] = "ClassVar[int]"; + (*item_map)[constant_name] = "_ClassVar[int]"; (*item_map)[extension_field->name()] = "_descriptor.FieldDescriptor"; } } // Returns the string format of a field's cpp_type -std::string PyiGenerator::GetFieldType(const FieldDescriptor& field_des) const { +std::string PyiGenerator::GetFieldType( + const FieldDescriptor& field_des, const Descriptor& containing_des, + const std::map& import_map) const { switch (field_des.cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: case FieldDescriptor::CPPTYPE_UINT32: @@ -294,29 +345,48 @@ std::string PyiGenerator::GetFieldType(const FieldDescriptor& field_des) const { case FieldDescriptor::CPPTYPE_BOOL: return "bool"; case FieldDescriptor::CPPTYPE_ENUM: - return ModuleLevelName(*field_des.enum_type()); + return ModuleLevelName(*field_des.enum_type(), import_map); case FieldDescriptor::CPPTYPE_STRING: if (field_des.type() == FieldDescriptor::TYPE_STRING) { return "str"; } else { return "bytes"; } - case FieldDescriptor::CPPTYPE_MESSAGE: - return ModuleLevelName(*field_des.message_type()); + case FieldDescriptor::CPPTYPE_MESSAGE: { + // If the field is inside a nested message and the nested message has the + // same name as a top-level message, then we need to prefix the field type + // with the module name for disambiguation. + std::string name = ModuleLevelName(*field_des.message_type(), import_map); + if ((containing_des.containing_type() != nullptr && + name == containing_des.name())) { + std::string module = ModuleName(field_des.file()->name()); + name = module + "." + name; + } + return name; + } default: - GOOGLE_LOG(FATAL) << "Unsuppoted field type."; + GOOGLE_LOG(FATAL) << "Unsupported field type."; } return ""; } -void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, - bool is_nested) const { +void PyiGenerator::PrintMessage( + const Descriptor& message_descriptor, bool is_nested, + const std::map& import_map) const { if (!is_nested) { printer_->Print("\n"); } std::string class_name = message_descriptor.name(); - printer_->Print("class $class_name$(_message.Message):\n", "class_name", - class_name); + std::string extra_base; + // A well-known type needs to inherit from its corresponding base class in + // net/proto2/python/internal/well_known_types. + if (IsWellKnownType(message_descriptor.full_name())) { + extra_base = ", _well_known_types." + message_descriptor.name(); + } else { + extra_base = ""; + } + printer_->Print("class $class_name$(_message.Message$extra_base$):\n", + "class_name", class_name, "extra_base", extra_base); printer_->Indent(); printer_->Indent(); @@ -361,7 +431,7 @@ void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, for (const auto& entry : nested_enums) { PrintEnum(*entry); // Adds enum value to item_map which will be ordered and printed later - AddEnumValue(*entry, &item_map); + AddEnumValue(*entry, &item_map, import_map); } // Prints nested messages @@ -374,7 +444,7 @@ void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, SortByName()); for (const auto& entry : nested_messages) { - PrintMessage(*entry, true); + PrintMessage(*entry, true, import_map); } // Adds extensions to item_map which will be ordered and printed later @@ -384,7 +454,7 @@ void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, for (int i = 0; i < message_descriptor.field_count(); ++i) { const FieldDescriptor& field_des = *message_descriptor.field(i); item_map[ToUpper(field_des.name()) + "_FIELD_NUMBER"] = - "ClassVar[int]"; + "_ClassVar[int]"; if (IsPythonKeyword(field_des.name())) { continue; } @@ -395,16 +465,16 @@ void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, field_type = (value_des->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ? "_containers.MessageMap[" : "_containers.ScalarMap["); - field_type += GetFieldType(*key_des); + field_type += GetFieldType(*key_des, message_descriptor, import_map); field_type += ", "; - field_type += GetFieldType(*value_des); + field_type += GetFieldType(*value_des, message_descriptor, import_map); } else { if (field_des.is_repeated()) { field_type = (field_des.cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ? "_containers.RepeatedCompositeFieldContainer[" : "_containers.RepeatedScalarFieldContainer["); } - field_type += GetFieldType(field_des); + field_type += GetFieldType(field_des, message_descriptor, import_map); } if (field_des.is_repeated()) { @@ -437,26 +507,31 @@ void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, printer_->Print(", $field_name$: ", "field_name", field_name); if (field_des->is_repeated() || field_des->cpp_type() != FieldDescriptor::CPPTYPE_BOOL) { - printer_->Print("Optional["); + printer_->Print("_Optional["); } if (field_des->is_map()) { const Descriptor* map_entry = field_des->message_type(); - printer_->Print("Mapping[$key_type$, $value_type$]", "key_type", - GetFieldType(*map_entry->field(0)), "value_type", - GetFieldType(*map_entry->field(1))); + printer_->Print( + "_Mapping[$key_type$, $value_type$]", "key_type", + GetFieldType(*map_entry->field(0), message_descriptor, import_map), + "value_type", + GetFieldType(*map_entry->field(1), message_descriptor, import_map)); } else { if (field_des->is_repeated()) { - printer_->Print("Iterable["); + printer_->Print("_Iterable["); } if (field_des->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { - printer_->Print("Union[$type_name$, Mapping]", "type_name", - GetFieldType(*field_des)); + printer_->Print( + "_Union[$type_name$, _Mapping]", "type_name", + GetFieldType(*field_des, message_descriptor, import_map)); } else { if (field_des->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) { - printer_->Print("Union[$type_name$, str]", "type_name", - ModuleLevelName(*field_des->enum_type())); + printer_->Print("_Union[$type_name$, str]", "type_name", + ModuleLevelName(*field_des->enum_type(), import_map)); } else { - printer_->Print("$type_name$", "type_name", GetFieldType(*field_des)); + printer_->Print( + "$type_name$", "type_name", + GetFieldType(*field_des, message_descriptor, import_map)); } } if (field_des->is_repeated()) { @@ -478,7 +553,8 @@ void PyiGenerator::PrintMessage(const Descriptor& message_descriptor, printer_->Outdent(); } -void PyiGenerator::PrintMessages() const { +void PyiGenerator::PrintMessages( + const std::map& import_map) const { // Order the descriptors by name to have same output with proto_to_pyi.py std::vector messages; messages.reserve(file_->message_type_count()); @@ -488,7 +564,7 @@ void PyiGenerator::PrintMessages() const { std::sort(messages.begin(), messages.end(), SortByName()); for (const auto& entry : messages) { - PrintMessage(*entry, false); + PrintMessage(*entry, false, import_map); } } @@ -534,17 +610,22 @@ bool PyiGenerator::Generate(const FileDescriptor* file, // Adds "DESCRIPTOR" into item_map. item_map["DESCRIPTOR"] = "_descriptor.FileDescriptor"; - PrintImports(&item_map); + + // import_map will be a mapping from filename to module alias, e.g. + // "google3/foo/bar.py" -> "_bar" + std::map import_map; + + PrintImports(&item_map, &import_map); // Adds top level enum values to item_map. for (int i = 0; i < file_->enum_type_count(); ++i) { - AddEnumValue(*file_->enum_type(i), &item_map); + AddEnumValue(*file_->enum_type(i), &item_map, import_map); } // Adds top level extensions to item_map. AddExtensions(*file_, &item_map); // Prints item map PrintItemMap(item_map); - PrintMessages(); + PrintMessages(import_map); PrintTopLevelEnums(); if (HasGenericServices(file)) { PrintServices(); diff --git a/src/google/protobuf/compiler/python/python_pyi_generator.h b/src/google/protobuf/compiler/python/python_pyi_generator.h index 49158d099d..9611ed43d1 100644 --- a/src/google/protobuf/compiler/python/python_pyi_generator.h +++ b/src/google/protobuf/compiler/python/python_pyi_generator.h @@ -36,6 +36,7 @@ #define GOOGLE_PROTOBUF_COMPILER_PYTHON_PYI_GENERATOR_H__ #include +#include #include #include @@ -65,26 +66,41 @@ class PROTOC_EXPORT PyiGenerator : public google::protobuf::compiler::CodeGenera ~PyiGenerator() override; // CodeGenerator methods. + uint64_t GetSupportedFeatures() const override { + // Code generators must explicitly support proto3 optional. + return CodeGenerator::FEATURE_PROTO3_OPTIONAL; + } bool Generate(const FileDescriptor* file, const std::string& parameter, GeneratorContext* generator_context, std::string* error) const override; private: - void PrintImports(std::map* item_map) const; + void PrintImportForDescriptor(const FileDescriptor& desc, + std::map* import_map, + std::set* seen_aliases) const; + void PrintImports(std::map* item_map, + std::map* import_map) const; void PrintEnum(const EnumDescriptor& enum_descriptor) const; void AddEnumValue(const EnumDescriptor& enum_descriptor, - std::map* item_map) const; + std::map* item_map, + const std::map& import_map) const; void PrintTopLevelEnums() const; template void AddExtensions(const DescriptorT& descriptor, std::map* item_map) const; - void PrintMessages() const; - void PrintMessage(const Descriptor& message_descriptor, bool is_nested) const; + void PrintMessages( + const std::map& import_map) const; + void PrintMessage(const Descriptor& message_descriptor, bool is_nested, + const std::map& import_map) const; void PrintServices() const; void PrintItemMap(const std::map& item_map) const; - std::string GetFieldType(const FieldDescriptor& field_des) const; + std::string GetFieldType( + const FieldDescriptor& field_des, const Descriptor& containing_des, + const std::map& import_map) const; template - std::string ModuleLevelName(const DescriptorT& descriptor) const; + std::string ModuleLevelName( + const DescriptorT& descriptor, + const std::map& import_map) const; // Very coarse-grained lock to ensure that Generate() is reentrant. // Guards file_ and printer_. diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index d56a3f0400..ff9f1496f3 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -580,6 +580,35 @@ const char* TcParser::FastF64P2(PROTOBUF_TC_PARAM_DECL) { namespace { +// Shift "byte" left by n * 7 bits, filling vacated bits with ones. +template +inline PROTOBUF_ALWAYS_INLINE uint64_t +shift_left_fill_with_ones(uint64_t byte, uint64_t ones) { + return (byte << (n * 7)) | (ones >> (64 - (n * 7))); +} + +// Shift "byte" left by n * 7 bits, filling vacated bits with ones, and +// put the new value in res. Return whether the result was negative. +template +inline PROTOBUF_ALWAYS_INLINE bool shift_left_fill_with_ones_was_negative( + uint64_t byte, uint64_t ones, int64_t& res) { +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a + // substantial improvement from capturing the sign from the condition code + // register on x86-64. + bool sign_bit; + asm("shldq %3, %2, %1" + : "=@ccs"(sign_bit), "+r"(byte) + : "r"(ones), "i"(n * 7)); + res = byte; + return sign_bit; +#else + // Generic fallback: + res = (byte << (n * 7)) | (ones >> (64 - (n * 7))); + return static_cast(res) < 0; +#endif +} + inline PROTOBUF_ALWAYS_INLINE std::pair Parse64FallbackPair(const char* p, int64_t res1) { auto ptr = reinterpret_cast(p); @@ -601,78 +630,42 @@ Parse64FallbackPair(const char* p, int64_t res1) { // has 57 high bits of ones, which is enough for the largest shift done. GOOGLE_DCHECK_EQ(res1 >> 7, -1); uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD) - uint64_t byte; // the "next" 7-bit chunk, shifted (result from SHLD) int64_t res2, res3; // accumulated result chunks -#define SHLD(n) byte = ((byte << (n * 7)) | (ones >> (64 - (n * 7)))) - - int sign_bit; -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) - // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a - // substantial improvement from capturing the sign from the condition code - // register on x86-64. -#define SHLD_SIGN(n) \ - asm("shldq %3, %2, %1" \ - : "=@ccs"(sign_bit), "+r"(byte) \ - : "r"(ones), "i"(n * 7)) -#else - // Generic fallback: -#define SHLD_SIGN(n) \ - do { \ - SHLD(n); \ - sign_bit = static_cast(byte) < 0; \ - } while (0) -#endif - byte = ptr[1]; - SHLD_SIGN(1); - res2 = byte; - if (!sign_bit) goto done2; - byte = ptr[2]; - SHLD_SIGN(2); - res3 = byte; - if (!sign_bit) goto done3; - -#undef SHLD_SIGN + if (!shift_left_fill_with_ones_was_negative<1>(ptr[1], ones, res2)) + goto done2; + if (!shift_left_fill_with_ones_was_negative<2>(ptr[2], ones, res3)) + goto done3; // For the remainder of the chunks, check the sign of the AND result. - byte = ptr[3]; - SHLD(3); - res1 &= byte; + res1 &= shift_left_fill_with_ones<3>(ptr[3], ones); if (res1 >= 0) goto done4; - byte = ptr[4]; - SHLD(4); - res2 &= byte; + res2 &= shift_left_fill_with_ones<4>(ptr[4], ones); if (res2 >= 0) goto done5; - byte = ptr[5]; - SHLD(5); - res3 &= byte; + res3 &= shift_left_fill_with_ones<5>(ptr[5], ones); if (res3 >= 0) goto done6; - byte = ptr[6]; - SHLD(6); - res1 &= byte; + res1 &= shift_left_fill_with_ones<6>(ptr[6], ones); if (res1 >= 0) goto done7; - byte = ptr[7]; - SHLD(7); - res2 &= byte; + res2 &= shift_left_fill_with_ones<7>(ptr[7], ones); if (res2 >= 0) goto done8; - byte = ptr[8]; - SHLD(8); - res3 &= byte; + res3 &= shift_left_fill_with_ones<8>(ptr[8], ones); if (res3 >= 0) goto done9; -#undef SHLD - // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this // case, the continuation bit of ptr[8] already set the top bit of res3 // correctly, so all we have to do is check that the expected case is true. - byte = ptr[9]; - if (PROTOBUF_PREDICT_TRUE(byte == 1)) goto done10; + if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10; // A value of 0, however, represents an over-serialized varint. This case // should not happen, but if does (say, due to a nonconforming serializer), // deassert the continuation bit that came from ptr[8]. - if (byte == 0) { + if (ptr[9] == 0) { +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // Use a small instruction since this is an uncommon code path. + asm("btcq $63,%0" : "+r"(res3)); +#else res3 ^= static_cast(1) << 63; +#endif goto done10; } @@ -680,18 +673,24 @@ Parse64FallbackPair(const char* p, int64_t res1) { // fit in 64 bits. If the continue bit is set, it is an unterminated varint. return {nullptr, 0}; -#define DONE(n) done##n : return {p + n, res1 & res2 & res3}; done2: return {p + 2, res1 & res2}; - DONE(3) - DONE(4) - DONE(5) - DONE(6) - DONE(7) - DONE(8) - DONE(9) - DONE(10) -#undef DONE +done3: + return {p + 3, res1 & res2 & res3}; +done4: + return {p + 4, res1 & res2 & res3}; +done5: + return {p + 5, res1 & res2 & res3}; +done6: + return {p + 6, res1 & res2 & res3}; +done7: + return {p + 7, res1 & res2 & res3}; +done8: + return {p + 8, res1 & res2 & res3}; +done9: + return {p + 9, res1 & res2 & res3}; +done10: + return {p + 10, res1 & res2 & res3}; } inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p, diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index 9db384b59a..f9e07763e7 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -150,12 +150,32 @@ CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' || // Given a char, interpret it as a numeric digit and return its value. // This supports any number base up to 36. -inline int DigitValue(char digit) { - if ('0' <= digit && digit <= '9') return digit - '0'; - if ('a' <= digit && digit <= 'z') return digit - 'a' + 10; - if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10; - return -1; -} +// Represents integer values of digits. +// Uses 36 to indicate an invalid character since we support +// bases up to 36. +static const int8_t kAsciiToInt[256] = { + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 00-0F + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 10-1F + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // ' '-'/' + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // '0'-'9' + 36, 36, 36, 36, 36, 36, 36, // ':'-'@' + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'P' + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, // 'Q'-'Z' + 36, 36, 36, 36, 36, 36, // '['-'`' + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'a'-'p' + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, // 'q'-'z' + 36, 36, 36, 36, 36, // '{'-DEL + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 80-8F + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 90-9F + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // A0-AF + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // B0-BF + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // C0-CF + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // D0-DF + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // E0-EF + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // F0-FF +}; + +inline int DigitValue(char digit) { return kAsciiToInt[digit & 0xFF]; } // Inline because it's only used in one place. inline char TranslateEscape(char c) { @@ -914,25 +934,49 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments, bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value, uint64_t* output) { - // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull() - // is non-standard. I hate the C standard library. :( - - // return strtoull(text.c_str(), NULL, 0); + // We can't just use strtoull() because (a) it accepts negative numbers, + // (b) We want additional range checks, (c) it reports overflows via errno. + +#if 0 + const char *str_begin = text.c_str(); + if (*str_begin == '-') return false; + char *str_end = nullptr; + errno = 0; + *output = std::strtoull(str_begin, &str_end, 0); + return (errno == 0 && str_end && *str_end == '\0' && *output <= max_value); +#endif const char* ptr = text.c_str(); int base = 10; + uint64_t overflow_if_mul_base = (kuint64max / 10) + 1; if (ptr[0] == '0') { if (ptr[1] == 'x' || ptr[1] == 'X') { // This is hex. base = 16; + overflow_if_mul_base = (kuint64max / 16) + 1; ptr += 2; } else { // This is octal. base = 8; + overflow_if_mul_base = (kuint64max / 8) + 1; } } uint64_t result = 0; + // For all the leading '0's, and also the first non-zero character, we + // don't need to multiply. + while (*ptr != '\0') { + int digit = DigitValue(*ptr++); + if (digit >= base) { + // The token provided by Tokenizer is invalid. i.e., 099 is an invalid + // token, but Tokenizer still think it's integer. + return false; + } + if (digit != 0) { + result = digit; + break; + } + } for (; *ptr != '\0'; ptr++) { int digit = DigitValue(*ptr); if (digit < 0 || digit >= base) { @@ -940,24 +984,18 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value, // token, but Tokenizer still think it's integer. return false; } - if (static_cast(digit) > max_value) return false; -#if PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW - // If there is a uint64_t overflow, there is a result * base logical - // overflow. This is done to avoid division. - if (__builtin_mul_overflow(result, base, &result) || - result > (max_value - digit)) { - // Overflow. - return false; - } - result += digit; -#else - if (result > (max_value - digit) / base) { - // Overflow. + if (result >= overflow_if_mul_base) { + // We know the multiply we're about to do will overflow, so exit now. return false; } + // We know that result * base won't overflow, but adding digit might... result = result * base + digit; -#endif // PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW + // C++ guarantees defined "wrap" semantics when unsigned integer + // operations overflow, making this a fast way to check if adding + // digit made result overflow, and thus, wrap around. + if (result < static_cast(base)) return false; } + if (result > max_value) return false; *output = result; return true; @@ -1199,4 +1237,3 @@ bool Tokenizer::IsIdentifier(const std::string& text) { } // namespace google #include - diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index 0b2e76c66b..1e6c95c893 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -178,9 +178,10 @@ const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024}; class TokenizerTest : public testing::Test { protected: // For easy testing. - uint64 ParseInteger(const std::string& text) { - uint64 result; - EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result)); + uint64_t ParseInteger(const std::string& text) { + uint64_t result; + EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result)) + << "'" << text << "'"; return result; } }; @@ -809,8 +810,8 @@ TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) { // ------------------------------------------------------------------- -// Test parse helpers. It's not really worth setting up a full data-driven -// test here. +// Test parse helpers. +// TODO(b/225783758): Add a fuzz test for this. TEST_F(TokenizerTest, ParseInteger) { EXPECT_EQ(0, ParseInteger("0")); EXPECT_EQ(123, ParseInteger("123")); @@ -823,7 +824,7 @@ TEST_F(TokenizerTest, ParseInteger) { // Test invalid integers that may still be tokenized as integers. EXPECT_EQ(0, ParseInteger("0x")); - uint64 i; + uint64_t i; // Test invalid integers that will never be tokenized as integers. EXPECT_FALSE(Tokenizer::ParseInteger("zxy", kuint64max, &i)); @@ -840,6 +841,105 @@ TEST_F(TokenizerTest, ParseInteger) { EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i)); EXPECT_TRUE(Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF", kuint64max, &i)); EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i)); + + // Test near the limits of signed parsing (values in kint64max +/- 1600) + for (int64_t offset = -1600; offset <= 1600; ++offset) { + uint64_t i = 0x7FFFFFFFFFFFFFFF + offset; + char decimal[32]; + snprintf(decimal, 32, "%llu", static_cast(i)); + if (offset > 0) { + uint64_t parsed = -1; + EXPECT_FALSE(Tokenizer::ParseInteger(decimal, kint64max, &parsed)) + << decimal << "=>" << parsed; + } else { + uint64_t parsed = -1; + EXPECT_TRUE(Tokenizer::ParseInteger(decimal, kint64max, &parsed)) + << decimal << "=>" << parsed; + EXPECT_EQ(parsed, i); + } + char octal[32]; + snprintf(octal, 32, "0%llo", static_cast(i)); + if (offset > 0) { + uint64_t parsed = -1; + EXPECT_FALSE(Tokenizer::ParseInteger(octal, kint64max, &parsed)) + << octal << "=>" << parsed; + } else { + uint64_t parsed = -1; + EXPECT_TRUE(Tokenizer::ParseInteger(octal, kint64max, &parsed)) + << octal << "=>" << parsed; + EXPECT_EQ(parsed, i); + } + char hex[32]; + snprintf(hex, 32, "0x%llx", static_cast(i)); + if (offset > 0) { + uint64_t parsed = -1; + EXPECT_FALSE(Tokenizer::ParseInteger(hex, kint64max, &parsed)) + << hex << "=>" << parsed; + } else { + uint64_t parsed = -1; + EXPECT_TRUE(Tokenizer::ParseInteger(hex, kint64max, &parsed)) << hex; + EXPECT_EQ(parsed, i); + } + // EXPECT_NE(offset, -237); + } + + // Test near the limits of unsigned parsing (values in kuint64max +/- 1600) + // By definition, values greater than kuint64max cannot be held in a uint64_t + // variable, so printing them is a little tricky; fortunately all but the + // last four digits are known, so we can hard-code them in the printf string, + // and we only need to format the last 4. + for (int64_t offset = -1600; offset <= 1600; ++offset) { + { + uint64_t i = 18446744073709551615u + offset; + char decimal[32]; + snprintf(decimal, 32, "1844674407370955%04llu", + static_cast(1615 + offset)); + if (offset > 0) { + uint64_t parsed = -1; + EXPECT_FALSE(Tokenizer::ParseInteger(decimal, kuint64max, &parsed)) + << decimal << "=>" << parsed; + } else { + uint64_t parsed = -1; + EXPECT_TRUE(Tokenizer::ParseInteger(decimal, kuint64max, &parsed)) + << decimal; + EXPECT_EQ(parsed, i); + } + } + { + uint64_t i = 01777777777777777777777u + offset; + if (offset > 0) { + char octal[32]; + snprintf(octal, 32, "0200000000000000000%04llo", + static_cast(offset - 1)); + uint64_t parsed = -1; + EXPECT_FALSE(Tokenizer::ParseInteger(octal, kuint64max, &parsed)) + << octal << "=>" << parsed; + } else { + char octal[32]; + snprintf(octal, 32, "0%llo", static_cast(i)); + uint64_t parsed = -1; + EXPECT_TRUE(Tokenizer::ParseInteger(octal, kuint64max, &parsed)) + << octal; + EXPECT_EQ(parsed, i); + } + } + { + uint64_t ui = 0xffffffffffffffffu + offset; + char hex[32]; + if (offset > 0) { + snprintf(hex, 32, "0x1000000000000%04llx", + static_cast(offset - 1)); + uint64_t parsed = -1; + EXPECT_FALSE(Tokenizer::ParseInteger(hex, kuint64max, &parsed)) + << hex << "=>" << parsed; + } else { + snprintf(hex, 32, "0x%llx", static_cast(ui)); + uint64_t parsed = -1; + EXPECT_TRUE(Tokenizer::ParseInteger(hex, kuint64max, &parsed)) << hex; + EXPECT_EQ(parsed, ui); + } + } + } } TEST_F(TokenizerTest, ParseFloat) { diff --git a/src/google/protobuf/port_def.inc b/src/google/protobuf/port_def.inc index f6a96bc644..2afc7b4f60 100644 --- a/src/google/protobuf/port_def.inc +++ b/src/google/protobuf/port_def.inc @@ -535,17 +535,6 @@ #define PROTOBUF_ASSUME(pred) GOOGLE_DCHECK(pred) #endif -// PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW tells the compiler if it has -// __builtin_mul_overflow intrinsic to check for multiplication overflow. -#ifdef PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW -#error PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW was previously defined -#endif -#if __has_builtin(__builtin_mul_overflow) -#define PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW 1 -#else -#define PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW 0 -#endif - // Specify memory alignment for structs, classes, etc. // Use like: // class PROTOBUF_ALIGNAS(16) MyClass { ... } @@ -782,6 +771,8 @@ #undef ERROR_INSTALL_FAILED #pragma push_macro("ERROR_NOT_FOUND") #undef ERROR_NOT_FOUND +#pragma push_macro("GetClassName") +#undef GetClassName #pragma push_macro("GetMessage") #undef GetMessage #pragma push_macro("IGNORE") diff --git a/src/google/protobuf/port_undef.inc b/src/google/protobuf/port_undef.inc index 33f6c0340f..3a3700f884 100644 --- a/src/google/protobuf/port_undef.inc +++ b/src/google/protobuf/port_undef.inc @@ -72,7 +72,6 @@ #undef PROTOBUF_NAMESPACE_CLOSE #undef PROTOBUF_UNUSED #undef PROTOBUF_ASSUME -#undef PROTOBUF_HAS_BUILTIN_MUL_OVERFLOW #undef PROTOBUF_EXPORT_TEMPLATE_DECLARE #undef PROTOBUF_EXPORT_TEMPLATE_DEFINE #undef PROTOBUF_ALIGNAS @@ -112,6 +111,7 @@ #pragma pop_macro("ERROR_BUSY") #pragma pop_macro("ERROR_INSTALL_FAILED") #pragma pop_macro("ERROR_NOT_FOUND") +#pragma pop_macro("GetClassName") #pragma pop_macro("GetMessage") #pragma pop_macro("IGNORE") #pragma pop_macro("IN") diff --git a/src/google/protobuf/repeated_ptr_field.h b/src/google/protobuf/repeated_ptr_field.h index 519b1a2878..aa278e6b86 100644 --- a/src/google/protobuf/repeated_ptr_field.h +++ b/src/google/protobuf/repeated_ptr_field.h @@ -749,7 +749,7 @@ class GenericTypeHandler { static inline GenericType* New(Arena* arena, GenericType&& value) { return Arena::Create(arena, std::move(value)); } - static inline GenericType* NewFromPrototype(const GenericType* prototype, + static inline GenericType* NewFromPrototype(const GenericType* /*prototype*/, Arena* arena = nullptr) { return New(arena); } diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc index 880b16dde6..04eee7324f 100644 --- a/src/google/protobuf/text_format.cc +++ b/src/google/protobuf/text_format.cc @@ -803,7 +803,7 @@ class TextFormat::Parser::ParserImpl { case FieldDescriptor::CPPTYPE_STRING: { std::string value; DO(ConsumeString(&value)); - SET_FIELD(String, value); + SET_FIELD(String, std::move(value)); break; }