diff --git a/CHANGES.txt b/CHANGES.txt index 4481b27b35..cb5bff5da6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,26 @@ +Unreleased Changes (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) + Protocol Compiler + * Stop emitting boilerplate {Copy/Merge}From in each ProtoBuf class + * split the accessor annotations according to their operation + * introduce proto message injector + * let proto message injector decide whether to calculate address info based on field descriptor and access type. + * Disable LITE_RUNTIME injector annotations + * move callback and @protoc_insertion_point after internal set of enum fields + * Improve ExtractFieldInfo codegen for string fields with oneof or default value + * Rename MessageInjector to FieldAccessListener + * Change the API of FieldAccessListener to support callbacks for info extraction + * make field_access_injector private + +2021-05-07 version 3.17.1 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) + PHP + * Fixed JSON parser to allow multiple values from the same oneof as long as + all but one are null. + + Ruby + * Fixed JSON parser to allow multiple values from the same oneof as long as + all but one are null. + + 2021-05-07 version 3.17.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) Protocol Compiler diff --git a/README.md b/README.md index 118b896668..618dc2a775 100644 --- a/README.md +++ b/README.md @@ -52,18 +52,18 @@ Protobuf supports several different programming languages. For each programming language, you can find instructions in the corresponding source directory about how to install protobuf runtime for that specific language: -| Language | Source | Ubuntu | MacOS | Windows | -|--------------------------------------|-------------------------------------------------------------|--------|-------|---------| -| C++ (include C++ runtime and protoc) | [src](src) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-cpp_distcheck.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fcpp_distcheck%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-bazel.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fbazel%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-dist_install.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fdist_install%2Fcontinuous) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-cpp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fcpp%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-cpp_distcheck.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fcpp_distcheck%2Fcontinuous) | [![Build status](https://ci.appveyor.com/api/projects/status/73ctee6ua4w2ruin?svg=true)](https://ci.appveyor.com/project/protobuf/protobuf) | -| Java | [java](java) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-java_compatibility.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fjava_compatibility%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-java_jdk7.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fjava_jdk7%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-java_oracle7.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fjava_oracle7%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-java_linkage_monitor.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fjava_linkage_monitor%2Fcontinuous) | | | -| Python | [python](python) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python27.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython27%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python35.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython35%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python36.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython36%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python37.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython37%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python_compatibility.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython_compatibility%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python27_cpp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython27_cpp%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python35_cpp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython35_cpp%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python36_cpp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython36_cpp%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python37_cpp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython37_cpp%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-python-release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fpython_release%2Fcontinuous) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-python.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fpython%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-python_cpp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fpython_cpp%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-python-release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fpython_release%2Fcontinuous) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/windows-python-release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fwindows%2Fpython_release%2Fcontinuous) | -| Objective-C | [objectivec](objectivec) | | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-objectivec_cocoapods_integration.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fobjectivec_cocoapods_integration%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-objectivec_ios_debug.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fobjectivec_ios_debug%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-objectivec_ios_release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fobjectivec_ios_release%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-objectivec_osx.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fobjectivec_osx%2Fcontinuous) | | -| C# | [csharp](csharp) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-csharp.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fcsharp%2Fcontinuous) | | [![Build status](https://ci.appveyor.com/api/projects/status/73ctee6ua4w2ruin?svg=true)](https://ci.appveyor.com/project/protobuf/protobuf)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/windows-csharp-release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fwindows%2Fcsharp_release%2Fcontinuous) | -| JavaScript | [js](js) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-javascript.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fjavascript%2Fcontinuous) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-javascript.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fjavascript%2Fcontinuous) | | -| Ruby | [ruby](ruby) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-ruby23.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fruby23%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-ruby24.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fruby24%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-ruby25.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fruby25%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-ruby26.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fruby26%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-ruby-release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fruby_release%2Fcontinuous) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-ruby23.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fruby23%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-ruby24.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fruby24%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-ruby25.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fruby25%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-ruby26.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fruby26%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-ruby-release.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fruby_release%2Fcontinuous) | | -| Go | [protocolbuffers/protobuf-go](https://github.com/protocolbuffers/protobuf-go) | | | | -| PHP | [php](php) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-php_all.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fphp_all%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-32-bit.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2F32-bit%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/linux-php80.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fubuntu%2Fphp80%2Fcontinuous) | [![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-php5.6_mac.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fphp5.6_mac%2Fcontinuous)
[![Build status](https://storage.googleapis.com/protobuf-kokoro-badges/status-badge/macos-php7.0_mac.png)](https://fusion.corp.google.com/projectanalysis/current/KOKORO/prod:protobuf%2Fgithub%2Fmaster%2Fmacos%2Fphp7.0_mac%2Fcontinuous) | | -| Dart | [dart-lang/protobuf](https://github.com/dart-lang/protobuf) | [![Build Status](https://travis-ci.org/dart-lang/protobuf.svg?branch=master)](https://travis-ci.org/dart-lang/protobuf) | | | +| Language | Source | +|--------------------------------------|-------------------------------------------------------------| +| C++ (include C++ runtime and protoc) | [src](src) | +| Java | [java](java) | +| Python | [python](python) | +| Objective-C | [objectivec](objectivec) | +| C# | [csharp](csharp) | +| JavaScript | [js](js) | +| Ruby | [ruby](ruby) | +| Go | [protocolbuffers/protobuf-go](https://github.com/protocolbuffers/protobuf-go)| +| PHP | [php](php) | +| Dart | [dart-lang/protobuf](https://github.com/dart-lang/protobuf) | Quick Start ----------- diff --git a/cmake/libprotobuf.cmake b/cmake/libprotobuf.cmake index a5be494fb7..479e362c89 100644 --- a/cmake/libprotobuf.cmake +++ b/cmake/libprotobuf.cmake @@ -11,6 +11,7 @@ set(libprotobuf_files ${protobuf_source_dir}/src/google/protobuf/dynamic_message.cc ${protobuf_source_dir}/src/google/protobuf/empty.pb.cc ${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc + ${protobuf_source_dir}/src/google/protobuf/field_access_listener.cc ${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc @@ -65,6 +66,7 @@ set(libprotobuf_includes ${protobuf_source_dir}/src/google/protobuf/duration.pb.h ${protobuf_source_dir}/src/google/protobuf/dynamic_message.h ${protobuf_source_dir}/src/google/protobuf/empty.pb.h + ${protobuf_source_dir}/src/google/protobuf/field_access_listener.h ${protobuf_source_dir}/src/google/protobuf/field_mask.pb.h ${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.h ${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.h diff --git a/conformance/failure_list_java.txt b/conformance/failure_list_java.txt index b29a63f5e4..808e230eba 100644 --- a/conformance/failure_list_java.txt +++ b/conformance/failure_list_java.txt @@ -42,7 +42,3 @@ Required.Proto3.JsonInput.Int32FieldPlusSign Required.Proto3.JsonInput.RepeatedFieldWrongElementTypeExpectingStringsGotBool Required.Proto3.JsonInput.RepeatedFieldWrongElementTypeExpectingStringsGotInt Required.Proto3.JsonInput.StringFieldNotAString -Required.Proto3.ProtobufInput.PrematureEofInDelimitedDataForKnownNonRepeatedValue.MESSAGE -Required.Proto3.ProtobufInput.PrematureEofInDelimitedDataForKnownRepeatedValue.MESSAGE -Required.Proto2.ProtobufInput.PrematureEofInDelimitedDataForKnownNonRepeatedValue.MESSAGE -Required.Proto2.ProtobufInput.PrematureEofInDelimitedDataForKnownRepeatedValue.MESSAGE diff --git a/conformance/failure_list_php_c.txt b/conformance/failure_list_php_c.txt index 1982029112..63c7e8a024 100644 --- a/conformance/failure_list_php_c.txt +++ b/conformance/failure_list_php_c.txt @@ -1,4 +1,2 @@ Recommended.Proto2.JsonInput.FieldNameExtension.Validator Required.Proto2.JsonInput.StoresDefaultPrimitive.Validator -Required.Proto3.JsonInput.OneofFieldNullSecond.JsonOutput -Required.Proto3.JsonInput.OneofFieldNullSecond.ProtobufOutput diff --git a/conformance/failure_list_ruby.txt b/conformance/failure_list_ruby.txt index ea5de36609..4938202ad7 100644 --- a/conformance/failure_list_ruby.txt +++ b/conformance/failure_list_ruby.txt @@ -56,5 +56,3 @@ Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.PackedInput.UnpackedOu Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.UnpackedInput.UnpackedOutput.ProtobufOutput Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.PackedInput.UnpackedOutput.ProtobufOutput Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.UnpackedInput.UnpackedOutput.ProtobufOutput -Required.Proto3.JsonInput.OneofFieldNullSecond.JsonOutput -Required.Proto3.JsonInput.OneofFieldNullSecond.ProtobufOutput diff --git a/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs index 0ad286f378..5e72525fc9 100644 --- a/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs @@ -161,12 +161,21 @@ namespace Google.Protobuf private static void AssertReadFromParseContext(ReadOnlySequence input, ParseContextAssertAction assertAction, bool assertIsAtEnd) { + // Check as ReadOnlySequence ParseContext.Initialize(input, out ParseContext parseCtx); assertAction(ref parseCtx); if (assertIsAtEnd) { Assert.IsTrue(SegmentedBufferHelper.IsAtEnd(ref parseCtx.buffer, ref parseCtx.state)); } + + // Check as ReadOnlySpan + ParseContext.Initialize(input.ToArray().AsSpan(), out ParseContext spanParseContext); + assertAction(ref spanParseContext); + if (assertIsAtEnd) + { + Assert.IsTrue(SegmentedBufferHelper.IsAtEnd(ref spanParseContext.buffer, ref spanParseContext.state)); + } } [Test] diff --git a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs index 65d2fe0395..05f1e36f96 100644 --- a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs +++ b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs @@ -41,32 +41,38 @@ namespace Google.Protobuf { public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Action assert) where T : IMessage { - var parsedStream = parser.ParseFrom(bytes); + var parsedMsg = parser.ParseFrom(bytes); + assert(parsedMsg); // Load content as single segment - var parsedBuffer = parser.ParseFrom(new ReadOnlySequence(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(new ReadOnlySequence(bytes)); + assert(parsedMsg); // Load content as multiple segments - parsedBuffer = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); + assert(parsedMsg); - assert(parsedStream); + // Load content as ReadOnlySpan + parsedMsg = parser.ParseFrom(new ReadOnlySpan(bytes)); + assert(parsedMsg); } public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Action assert) { - var parsedStream = parser.ParseFrom(bytes); + var parsedMsg = parser.ParseFrom(bytes); + assert(parsedMsg); // Load content as single segment - var parsedBuffer = parser.ParseFrom(new ReadOnlySequence(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(new ReadOnlySequence(bytes)); + assert(parsedMsg); // Load content as multiple segments - parsedBuffer = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); + assert(parsedMsg); - assert(parsedStream); + // Load content as ReadOnlySpan + parsedMsg = parser.ParseFrom(new ReadOnlySpan(bytes)); + assert(parsedMsg); } public static void AssertReadingMessageThrows(MessageParser parser, byte[] bytes) @@ -76,6 +82,8 @@ namespace Google.Protobuf Assert.Throws(() => parser.ParseFrom(bytes)); Assert.Throws(() => parser.ParseFrom(new ReadOnlySequence(bytes))); + + Assert.Throws(() => parser.ParseFrom(new ReadOnlySpan(bytes))); } public static void AssertRoundtrip(MessageParser parser, T message, Action additionalAssert = null) where T : IMessage @@ -87,20 +95,24 @@ namespace Google.Protobuf message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray(), "Both serialization approaches need to result in the same data."); + var parsedMsg = parser.ParseFrom(bytes); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); + // Load content as single segment - var parsedBuffer = parser.ParseFrom(new ReadOnlySequence(bytes)); - Assert.AreEqual(message, parsedBuffer); - additionalAssert?.Invoke(parsedBuffer); + parsedMsg = parser.ParseFrom(new ReadOnlySequence(bytes)); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); // Load content as multiple segments - parsedBuffer = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); - Assert.AreEqual(message, parsedBuffer); - additionalAssert?.Invoke(parsedBuffer); - - var parsedStream = parser.ParseFrom(bytes); - - Assert.AreEqual(message, parsedStream); - additionalAssert?.Invoke(parsedStream); + parsedMsg = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); + + // Load content as ReadOnlySpan + parsedMsg = parser.ParseFrom(new ReadOnlySpan(bytes)); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); } public static void AssertWritingMessage(IMessage message) diff --git a/csharp/src/Google.Protobuf/CodedInputStream.cs b/csharp/src/Google.Protobuf/CodedInputStream.cs index b09f96ce28..27b23c0d9c 100644 --- a/csharp/src/Google.Protobuf/CodedInputStream.cs +++ b/csharp/src/Google.Protobuf/CodedInputStream.cs @@ -435,8 +435,7 @@ namespace Google.Protobuf // we will need to switch back again to CodedInputStream-based parsing (which involves copying and storing the state) to be able to // invoke the legacy MergeFrom(CodedInputStream) method. // For now, this inefficiency is fine, considering this is only a backward-compatibility scenario (and regenerating the code fixes it). - var span = new ReadOnlySpan(buffer); - ParseContext.Initialize(ref span, ref state, out ParseContext ctx); + ParseContext.Initialize(buffer.AsSpan(), ref state, out ParseContext ctx); try { ParsingPrimitivesMessages.ReadMessage(ref ctx, builder); diff --git a/csharp/src/Google.Protobuf/MessageExtensions.cs b/csharp/src/Google.Protobuf/MessageExtensions.cs index 36a9df7286..c4b3f82343 100644 --- a/csharp/src/Google.Protobuf/MessageExtensions.cs +++ b/csharp/src/Google.Protobuf/MessageExtensions.cs @@ -79,6 +79,15 @@ namespace Google.Protobuf public static void MergeFrom(this IMessage message, Stream input) => MergeFrom(message, input, false, null); + /// + /// Merges data from the given span into an existing message. + /// + /// The message to merge the data into. + /// Span containing the data to merge, which must be protobuf-encoded binary data. + [SecuritySafeCritical] + public static void MergeFrom(this IMessage message, ReadOnlySpan span) => + MergeFrom(message, span, false, null); + /// /// Merges length-delimited data from the given stream into an existing message. /// @@ -294,6 +303,16 @@ namespace Google.Protobuf ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref ctx.state); } + [SecuritySafeCritical] + internal static void MergeFrom(this IMessage message, ReadOnlySpan data, bool discardUnknownFields, ExtensionRegistry registry) + { + ParseContext.Initialize(data, out ParseContext ctx); + ctx.DiscardUnknownFields = discardUnknownFields; + ctx.ExtensionRegistry = registry; + ParsingPrimitivesMessages.ReadRawMessage(ref ctx, message); + ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref ctx.state); + } + internal static void MergeDelimitedFrom(this IMessage message, Stream input, bool discardUnknownFields, ExtensionRegistry registry) { ProtoPreconditions.CheckNotNull(message, "message"); diff --git a/csharp/src/Google.Protobuf/MessageParser.cs b/csharp/src/Google.Protobuf/MessageParser.cs index f8b26c2348..30a25a8698 100644 --- a/csharp/src/Google.Protobuf/MessageParser.cs +++ b/csharp/src/Google.Protobuf/MessageParser.cs @@ -128,6 +128,19 @@ namespace Google.Protobuf return message; } + /// + /// Parses a message from the given span. + /// + /// The data to parse. + /// The parsed message. + [SecuritySafeCritical] + public IMessage ParseFrom(ReadOnlySpan data) + { + IMessage message = factory(); + message.MergeFrom(data, DiscardUnknownFields, Extensions); + return message; + } + /// /// Parses a length-delimited message from the given stream. /// @@ -315,6 +328,19 @@ namespace Google.Protobuf return message; } + /// + /// Parses a message from the given span. + /// + /// The data to parse. + /// The parsed message. + [SecuritySafeCritical] + public new T ParseFrom(ReadOnlySpan data) + { + T message = factory(); + message.MergeFrom(data, DiscardUnknownFields, Extensions); + return message; + } + /// /// Parses a length-delimited message from the given stream. /// diff --git a/csharp/src/Google.Protobuf/ParseContext.cs b/csharp/src/Google.Protobuf/ParseContext.cs index bf46236565..7b278b5a92 100644 --- a/csharp/src/Google.Protobuf/ParseContext.cs +++ b/csharp/src/Google.Protobuf/ParseContext.cs @@ -58,8 +58,27 @@ namespace Google.Protobuf internal ReadOnlySpan buffer; internal ParserInternalState state; + /// + /// Initialize a , building all from defaults and + /// the given . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void Initialize(ReadOnlySpan buffer, out ParseContext ctx) + { + ParserInternalState state = default; + state.sizeLimit = DefaultSizeLimit; + state.recursionLimit = DefaultRecursionLimit; + state.currentLimit = int.MaxValue; + state.bufferSize = buffer.Length; + + Initialize(buffer, ref state, out ctx); + } + + /// + /// Initialize a using existing , e.g. from . + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void Initialize(ref ReadOnlySpan buffer, ref ParserInternalState state, out ParseContext ctx) + internal static void Initialize(ReadOnlySpan buffer, ref ParserInternalState state, out ParseContext ctx) { ctx.buffer = buffer; ctx.state = state; diff --git a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java index 37b986d7a5..1060c5af2d 100644 --- a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java +++ b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java @@ -873,6 +873,9 @@ public abstract class CodedInputStream { builder.mergeFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); } @@ -889,6 +892,9 @@ public abstract class CodedInputStream { T result = parser.parsePartialFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); return result; } @@ -1595,6 +1601,9 @@ public abstract class CodedInputStream { builder.mergeFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); } @@ -1611,6 +1620,9 @@ public abstract class CodedInputStream { T result = parser.parsePartialFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); return result; } @@ -2392,6 +2404,9 @@ public abstract class CodedInputStream { builder.mergeFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); } @@ -2408,6 +2423,9 @@ public abstract class CodedInputStream { T result = parser.parsePartialFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); return result; } @@ -3489,6 +3507,9 @@ public abstract class CodedInputStream { builder.mergeFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); } @@ -3505,6 +3526,9 @@ public abstract class CodedInputStream { T result = parser.parsePartialFrom(this, extensionRegistry); checkLastTagWas(0); --recursionDepth; + if (getBytesUntilLimit() != 0) { + throw InvalidProtocolBufferException.truncatedMessage(); + } popLimit(oldLimit); return result; } diff --git a/java/kotlin/pom.xml b/java/kotlin/pom.xml index e1c1c7f4ab..f5de8cd3bd 100644 --- a/java/kotlin/pom.xml +++ b/java/kotlin/pom.xml @@ -18,6 +18,7 @@ 1.5.0 + 1.4.32 @@ -234,7 +235,46 @@ + + org.jetbrains.dokka + dokka-maven-plugin + ${dokka.version} + + + pre-site + + dokka + + + + + gcode/kotlin + ${project.basedir}/src/main/kotlin/com/google/protobuf + + + + https://developers.google.com/protocol-buffers/docs/reference/java/ + + + + + + org.jetbrains.dokka + gfm-plugin + ${dokka.version} + + + + + + + + jcenter + JCenter + https://jcenter.bintray.com/ + + diff --git a/js/README.md b/js/README.md index 2f5490b108..dcc9e2b698 100644 --- a/js/README.md +++ b/js/README.md @@ -39,9 +39,8 @@ If you want, you can compile `protoc` from source instead. To do this follow the instructions in [the top-level README](https://github.com/protocolbuffers/protobuf/blob/master/src/README.md). -Once you have `protoc` compiled, you can run the tests by typing: +Once you have `protoc` compiled, you can run the tests provided along with our project to examine whether it can run successfully. In order to do this, you should download the Protocol Buffer source code from the release page with the link above. Then extract the source code and navigate to the folder named `js` containing a `package.json` file and a series of test files. In this folder, you can run the commands below to run the tests automatically. - $ cd js $ npm install $ npm test diff --git a/kokoro/linux/aarch64/php_build_and_run_tests_with_qemu_aarch64.sh b/kokoro/linux/aarch64/php_build_and_run_tests_with_qemu_aarch64.sh new file mode 100755 index 0000000000..bcb3d9dd94 --- /dev/null +++ b/kokoro/linux/aarch64/php_build_and_run_tests_with_qemu_aarch64.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -ex + +# Install composer +curl -sS https://getcomposer.org/installer | php +mkdir -p "$HOME/bin" +mv composer.phar "$HOME/bin/composer" +PATH="$HOME/bin:$PATH" + +# go to the repo root +cd $(dirname $0)/../../.. + +cd php + +composer install +composer test +composer test_c diff --git a/kokoro/linux/aarch64/test_php_aarch64.sh b/kokoro/linux/aarch64/test_php_aarch64.sh new file mode 100755 index 0000000000..b11ef43071 --- /dev/null +++ b/kokoro/linux/aarch64/test_php_aarch64.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -ex + +# go to the repo root +cd $(dirname $0)/../../.. + +# there is no php testing docker image readily available, so we build +# our own. It's a aarch64 image, but that's fine since qemu will +# automatically be used to run the commands in the dockerfile. +docker build -t testimage_protobuf_php_arm64v8 kokoro/linux/aarch64/testimage_protobuf_php_arm64v8 + +if [[ -t 0 ]]; then + DOCKER_TTY_ARGS="-it" +else + # The input device on kokoro is not a TTY, so -it does not work. + DOCKER_TTY_ARGS= +fi + +# crosscompile protoc as we will later need it for the php build. +# we build it under the dockcross/manylinux2014-aarch64 image so that the resulting protoc binary is compatible +# with a wide range of linux distros (including any docker images we will use later to build and test php) +kokoro/linux/aarch64/dockcross_helpers/run_dockcross_manylinux2014_aarch64.sh kokoro/linux/aarch64/protoc_crosscompile_aarch64.sh + +# use an actual aarch64 docker image (with a real aarch64 php) to run build & test protobuf php under an emulator +# * mount the protobuf root as /work to be able to access the crosscompiled files +# * to avoid running the process inside docker as root (which can pollute the workspace with files owned by root), we force +# running under current user's UID and GID. To be able to do that, we need to provide a home directory for the user +# otherwise the UID would be homeless under the docker container and pip install wouldn't work. For simplicity, +# we just run map the user's home to a throwaway temporary directory +docker run $DOCKER_TTY_ARGS --rm --user "$(id -u):$(id -g)" -e "HOME=/home/fake-user" -v "$(mktemp -d):/home/fake-user" -v "$(pwd)":/work -w /work testimage_protobuf_php_arm64v8 kokoro/linux/aarch64/php_build_and_run_tests_with_qemu_aarch64.sh diff --git a/kokoro/linux/aarch64/testimage_protobuf_php_arm64v8/Dockerfile b/kokoro/linux/aarch64/testimage_protobuf_php_arm64v8/Dockerfile new file mode 100644 index 0000000000..eae1bbc4bd --- /dev/null +++ b/kokoro/linux/aarch64/testimage_protobuf_php_arm64v8/Dockerfile @@ -0,0 +1,3 @@ +FROM arm64v8/debian:buster + +RUN apt-get update && apt-get install -y php7.3-cli php7.3-dev php7.3-bcmath composer phpunit curl git valgrind && apt-get clean diff --git a/kokoro/linux/php_aarch64/build.sh b/kokoro/linux/php_aarch64/build.sh new file mode 100755 index 0000000000..eba8b5538b --- /dev/null +++ b/kokoro/linux/php_aarch64/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# +# This is the top-level script we give to Kokoro as the entry point for +# running the "continuous" and "presubmit" jobs. + +set -ex + +# Change to repo root +cd $(dirname $0)/../../.. + +# Initialize any submodules. +git submodule update --init --recursive + +kokoro/linux/aarch64/qemu_helpers/prepare_qemu.sh + +kokoro/linux/aarch64/test_php_aarch64.sh diff --git a/kokoro/linux/php_aarch64/continuous.cfg b/kokoro/linux/php_aarch64/continuous.cfg new file mode 100644 index 0000000000..ff20682dfb --- /dev/null +++ b/kokoro/linux/php_aarch64/continuous.cfg @@ -0,0 +1,11 @@ +# Config file for running tests in Kokoro + +# Location of the build script in repository +build_file: "protobuf/kokoro/linux/php_aarch64/build.sh" +timeout_mins: 120 + +action { + define_artifacts { + regex: "**/sponge_log.xml" + } +} diff --git a/kokoro/linux/php_aarch64/presubmit.cfg b/kokoro/linux/php_aarch64/presubmit.cfg new file mode 100644 index 0000000000..ff20682dfb --- /dev/null +++ b/kokoro/linux/php_aarch64/presubmit.cfg @@ -0,0 +1,11 @@ +# Config file for running tests in Kokoro + +# Location of the build script in repository +build_file: "protobuf/kokoro/linux/php_aarch64/build.sh" +timeout_mins: 120 + +action { + define_artifacts { + regex: "**/sponge_log.xml" + } +} diff --git a/objectivec/README.md b/objectivec/README.md index 2583779d38..bbe5726d45 100644 --- a/objectivec/README.md +++ b/objectivec/README.md @@ -194,4 +194,4 @@ Documentation The complete documentation for Protocol Buffers is available via the web at: - https://developers.google.com/protocol-buffers/ +https://developers.google.com/protocol-buffers/ diff --git a/objectivec/Tests/GPBDescriptorTests.m b/objectivec/Tests/GPBDescriptorTests.m index 6fa7202bc4..bdcc2e893c 100644 --- a/objectivec/Tests/GPBDescriptorTests.m +++ b/objectivec/Tests/GPBDescriptorTests.m @@ -305,7 +305,7 @@ GPBDescriptor *descriptor = [TestOneof2 descriptor]; // All fields should be listed. - XCTAssertEqual(descriptor.fields.count, 17U); + XCTAssertEqual(descriptor.fields.count, 21U); // There are two oneofs in there. XCTAssertEqual(descriptor.oneofs.count, 2U); @@ -330,7 +330,7 @@ GPBOneofDescriptor *oneofBar = [descriptor oneofWithName:@"bar"]; XCTAssertNotNil(oneofBar); - XCTAssertEqual(oneofBar.fields.count, 6U); + XCTAssertEqual(oneofBar.fields.count, 10U); // Pointer comparisons. XCTAssertEqual([oneofBar fieldWithNumber:TestOneof2_FieldNumber_BarString], diff --git a/php/ext/google/protobuf/message.c b/php/ext/google/protobuf/message.c index 2d9f9b4cc8..7cd7d23208 100644 --- a/php/ext/google/protobuf/message.c +++ b/php/ext/google/protobuf/message.c @@ -149,6 +149,9 @@ static bool Message_set(Message *intern, const upb_fielddef *f, zval *val) { } else if (upb_fielddef_isseq(f)) { msgval.array_val = RepeatedField_GetUpbArray(val, TypeInfo_Get(f), arena); if (!msgval.array_val) return false; + } else if (upb_fielddef_issubmsg(f) && Z_TYPE_P(val) == IS_NULL) { + upb_msg_clearfield(intern->msg, f); + return true; } else { if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) return false; } @@ -198,8 +201,6 @@ static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - upb_msgval val1 = upb_msg_get(m1, f); - upb_msgval val2 = upb_msg_get(m2, f); if (upb_fielddef_haspresence(f)) { if (upb_msg_has(m1, f) != upb_msg_has(m2, f)) { @@ -208,6 +209,9 @@ static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) if (!upb_msg_has(m1, f)) continue; } + upb_msgval val1 = upb_msg_get(m1, f); + upb_msgval val2 = upb_msg_get(m2, f); + if (upb_fielddef_ismap(f)) { if (!MapEq(val1.map_val, val2.map_val, MapType_Get(f))) return false; } else if (upb_fielddef_isseq(f)) { @@ -454,11 +458,6 @@ bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena, ZVAL_DEREF(val); } - if (Z_TYPE_P(val) == IS_NULL) { - *msg = NULL; - return true; - } - if (Z_TYPE_P(val) == IS_OBJECT && instanceof_function(Z_OBJCE_P(val), desc->class_entry)) { Message *intern = (Message*)Z_OBJ_P(val); @@ -466,7 +465,8 @@ bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena, *msg = intern->msg; return true; } else { - zend_throw_exception_ex(NULL, 0, "Given value is not an instance of %s.", + zend_throw_exception_ex(zend_ce_type_error, 0, + "Given value is not an instance of %s.", ZSTR_VAL(desc->class_entry->name)); return false; } @@ -1051,7 +1051,10 @@ PHP_METHOD(Message, writeOneof) { f = upb_msgdef_itof(intern->desc->msgdef, field_num); - if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) { + if (upb_fielddef_issubmsg(f) && Z_TYPE_P(val) == IS_NULL) { + upb_msg_clearfield(intern->msg, f); + return; + } else if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) { return; } diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 913dfad7e9..774c8d22eb 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -1,27 +1,54 @@ /* Amalgamated source file */ #include "php-upb.h" /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -137,9 +164,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#if defined(__has_attribute) +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -150,7 +208,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -194,8 +252,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif +/** upb/decode.c ************************************************************/ #include #include @@ -891,7 +950,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, state.end_group = DECODE_NOGROUP; state.arena.head = arena->head; state.arena.last_size = arena->last_size; - state.arena.cleanups = arena->cleanups; + state.arena.cleanup_metadata = arena->cleanup_metadata; state.arena.parent = arena; if (UPB_UNLIKELY(UPB_SETJMP(state.err))) { @@ -902,7 +961,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, arena->head.ptr = state.arena.head.ptr; arena->head.end = state.arena.head.end; - arena->cleanups = state.arena.cleanups; + arena->cleanup_metadata = state.arena.cleanup_metadata; return ok; } @@ -911,6 +970,8 @@ bool _upb_decode(const char *buf, size_t size, void *msg, #undef OP_VARPCK_LG2 #undef OP_STRING #undef OP_SUBMSG + +/** upb/encode.c ************************************************************/ /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ @@ -1386,7 +1447,7 @@ char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, return ret; } - +/** upb/msg.c ************************************************************/ /** upb_msg *******************************************************************/ @@ -1517,7 +1578,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return NULL; } - upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a)); + upb_strtable_init(&map->table, 4, a); map->key_size = key_size; map->val_size = value_size; @@ -1638,11 +1699,13 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); return true; } + +/** upb/table.c ************************************************************/ /* -** upb_table Implementation -** -** Implementation is heavily inspired by Lua's ltable.c. -*/ + * upb_table Implementation + * + * Implementation is heavily inspired by Lua's ltable.c. + */ #include @@ -1663,9 +1726,15 @@ static const double MAX_LOAD = 0.85; * cache effects). The lower this is, the more memory we'll use. */ static const double MIN_DENSITY = 0.1; -bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } +static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } -int log2ceil(uint64_t v) { +static upb_value _upb_value_val(uint64_t val) { + upb_value ret; + _upb_value_setval(&ret, val); + return ret; +} + +static int log2ceil(uint64_t v) { int ret = 0; bool pow2 = is_pow2(v); while (v >>= 1) ret++; @@ -1673,11 +1742,7 @@ int log2ceil(uint64_t v) { return UPB_MIN(UPB_MAXARRSIZE, ret); } -char *upb_strdup(const char *s, upb_alloc *a) { - return upb_strdup2(s, strlen(s), a); -} - -char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { +char *upb_strdup2(const char *s, size_t len, upb_arena *a) { size_t n; char *p; @@ -1686,7 +1751,7 @@ char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { /* Always null-terminate, even if binary data; but don't rely on the input to * have a null-terminating byte since it may be a raw binary buffer. */ n = len + 1; - p = upb_malloc(a, n); + p = upb_arena_malloc(a, n); if (p) { memcpy(p, s, len); p[len] = 0; @@ -1721,16 +1786,24 @@ typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); /* Base table (shared code) ***************************************************/ -/* For when we need to cast away const. */ -static upb_tabent *mutable_entries(upb_table *t) { - return (upb_tabent*)t->entries; +static uint32_t upb_inthash(uintptr_t key) { + return (uint32_t)key; +} + +static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { + return t->entries + (hash & t->mask); +} + +static bool upb_arrhas(upb_tabval key) { + return key.val != (uint64_t)-1; } + static bool isfull(upb_table *t) { return t->count == t->max_count; } -static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { +static bool init(upb_table *t, uint8_t size_lg2, upb_arena *a) { size_t bytes; t->count = 0; @@ -1739,21 +1812,17 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { t->max_count = upb_table_size(t) * MAX_LOAD; bytes = upb_table_size(t) * sizeof(upb_tabent); if (bytes > 0) { - t->entries = upb_malloc(a, bytes); + t->entries = upb_arena_malloc(a, bytes); if (!t->entries) return false; - memset(mutable_entries(t), 0, bytes); + memset(t->entries, 0, bytes); } else { t->entries = NULL; } return true; } -static void uninit(upb_table *t, upb_alloc *a) { - upb_free(a, mutable_entries(t)); -} - static upb_tabent *emptyent(upb_table *t, upb_tabent *e) { - upb_tabent *begin = mutable_entries(t); + upb_tabent *begin = t->entries; upb_tabent *end = begin + upb_table_size(t); for (e = e + 1; e < end; e++) { if (upb_tabent_isempty(e)) return e; @@ -1903,9 +1972,9 @@ static size_t begin(const upb_table *t) { /* A simple "subclass" of upb_table that only adds a hash function for strings. */ -static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { +static upb_tabkey strcopy(lookupkey_t k2, upb_arena *a) { uint32_t len = (uint32_t) k2.str.len; - char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); + char *str = upb_arena_malloc(a, k2.str.len + sizeof(uint32_t) + 1); if (str == NULL) return 0; memcpy(str, &len, sizeof(uint32_t)); if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); @@ -1929,9 +1998,7 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) { return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); } -bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_strtable_init(upb_strtable *t, size_t expected_size, upb_arena *a) { // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator. size_t need_entries = (expected_size + 1) * 1204 / 1024; UPB_ASSERT(need_entries >= expected_size * 0.85); @@ -1945,14 +2012,7 @@ void upb_strtable_clear(upb_strtable *t) { memset((char*)t->t.entries, 0, bytes); } -void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { - size_t i; - for (i = 0; i < upb_table_size(&t->t); i++) - upb_free(a, (void*)t->t.entries[i].key); - uninit(&t->t, a); -} - -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a) { upb_strtable new_table; upb_strtable_iter i; @@ -1961,17 +2021,15 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { upb_strview key = upb_strtable_iter_key(&i); - upb_strtable_insert3( - &new_table, key.data, key.size, - upb_strtable_iter_value(&i), a); + upb_strtable_insert(&new_table, key.data, key.size, + upb_strtable_iter_value(&i), a); } - upb_strtable_uninit2(t, a); *t = new_table; return true; } -bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, - upb_value v, upb_alloc *a) { +bool upb_strtable_insert(upb_strtable *t, const char *k, size_t len, + upb_value v, upb_arena *a) { lookupkey_t key; upb_tabkey tabkey; uint32_t hash; @@ -1998,19 +2056,11 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, return lookup(&t->t, strkey2(key, len), v, hash, &streql); } -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc) { +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val) { uint32_t hash = table_hash(key, len); upb_tabkey tabkey; - if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { - if (alloc) { - /* Arena-based allocs don't need to free and won't pass this. */ - upb_free(alloc, (void*)tabkey); - } - return true; - } else { - return false; - } + return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql); } /* Iteration */ @@ -2108,7 +2158,7 @@ static void check(upb_inttable *t) { } bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, - upb_alloc *a) { + upb_arena *a) { size_t array_bytes; if (!init(&t->t, hsize_lg2, a)) return false; @@ -2117,9 +2167,8 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, t->array_size = UPB_MAX(1, asize); t->array_count = 0; array_bytes = t->array_size * sizeof(upb_value); - t->array = upb_malloc(a, array_bytes); + t->array = upb_arena_malloc(a, array_bytes); if (!t->array) { - uninit(&t->t, a); return false; } memset(mutable_array(t), 0xff, array_bytes); @@ -2127,18 +2176,12 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, return true; } -bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_inttable_init(upb_inttable *t, upb_arena *a) { return upb_inttable_sizedinit(t, 0, 4, a); } -void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { - uninit(&t->t, a); - upb_free(a, mutable_array(t)); -} - -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a) { +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a) { upb_tabval tabval; tabval.val = val.val; UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ @@ -2169,7 +2212,6 @@ bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, UPB_ASSERT(t->t.count == new_table.count); - uninit(&t->t, a); t->t = new_table; } insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); @@ -2213,21 +2255,7 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { return success; } -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a) { - return upb_inttable_insert2(t, (uintptr_t)key, val, a); -} - -bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, - upb_value *v) { - return upb_inttable_lookup(t, (uintptr_t)key, v); -} - -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { - return upb_inttable_remove(t, (uintptr_t)key, val); -} - -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { +void upb_inttable_compact(upb_inttable *t, upb_arena *a) { /* A power-of-two histogram of the table keys. */ size_t counts[UPB_MAXARRSIZE + 1] = {0}; @@ -2275,12 +2303,11 @@ void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { upb_inttable_begin(&i, t); for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { uintptr_t k = upb_inttable_iter_key(&i); - upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); + upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a); } UPB_ASSERT(new_t.array_size == arr_size); UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); } - upb_inttable_uninit2(t, a); *t = new_t; } @@ -2354,6 +2381,7 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, i1->array_part == i2->array_part; } +/** upb/upb.c ************************************************************/ #include #include @@ -2420,6 +2448,19 @@ static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, } } +static uint32_t *upb_cleanup_pointer(uintptr_t cleanup_metadata) { + return (uint32_t *)(cleanup_metadata & ~0x1); +} + +static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) { + return cleanup_metadata & 0x1; +} + +static uintptr_t upb_cleanup_metadata(uint32_t *cleanup, + bool has_initial_block) { + return (uintptr_t)cleanup | has_initial_block; +} + upb_alloc upb_alloc_global = {&upb_global_allocfunc}; /* upb_arena ******************************************************************/ @@ -2465,7 +2506,8 @@ static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr, a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char); a->head.end = UPB_PTR_AT(block, size, char); - a->cleanups = &block->cleanups; + a->cleanup_metadata = upb_cleanup_metadata( + &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata)); UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr); } @@ -2513,6 +2555,7 @@ upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) { a->refcount = 1; a->freelist = NULL; a->freelist_tail = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, false); upb_arena_addblock(a, a, mem, n); @@ -2540,7 +2583,7 @@ upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { a->head.ptr = mem; a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); a->freelist = NULL; - a->cleanups = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, true); return a; } @@ -2575,15 +2618,17 @@ void upb_arena_free(upb_arena *a) { bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { cleanup_ent *ent; + uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata); - if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { + if (!cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent)); + cleanups = upb_cleanup_pointer(a->cleanup_metadata); } a->head.end -= sizeof(cleanup_ent); ent = (cleanup_ent*)a->head.end; - (*a->cleanups)++; + (*cleanups)++; UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent)); ent->cleanup = func; @@ -2592,11 +2637,18 @@ bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { return true; } -void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { +bool upb_arena_fuse(upb_arena *a1, upb_arena *a2) { upb_arena *r1 = arena_findroot(a1); upb_arena *r2 = arena_findroot(a2); - if (r1 == r2) return; /* Already fused. */ + if (r1 == r2) return true; /* Already fused. */ + + /* Do not fuse initial blocks since we cannot lifetime extend them. */ + if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false; + if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false; + + /* Only allow fuse with a common allocator */ + if (r1->block_alloc != r2->block_alloc) return false; /* We want to join the smaller tree to the larger tree. * So swap first if they are backwards. */ @@ -2614,12 +2666,15 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { r1->freelist = r2->freelist; } r2->parent = r1; + return true; } -// Fast decoder: ~3x the speed of decode.c, but x86-64 specific. + +/** upb/decode_fast.c ************************************************************/ +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. // Also the table size grows by 2x. // -// Could potentially be ported to ARM64 or other 64-bit archs that pass at -// least six arguments in registers. +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. // // The overall design is to create specialized functions for every possible // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch @@ -2639,8 +2694,10 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data -#define RETURN_GENERIC(m) \ - /* fprintf(stderr, m); */ \ +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ return fastdecode_generic(d, ptr, msg, table, hasbits, 0); typedef enum { @@ -2651,21 +2708,18 @@ typedef enum { } upb_card; UPB_NOINLINE -static const char *fastdecode_isdonefallback(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits, int overrun) { +static const char *fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; ptr = decode_isdonefallback_inl(d, ptr, overrun); if (ptr == NULL) { return fastdecode_err(d); } - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits) { +static const char *fastdecode_dispatch(UPB_PARSE_PARAMS) { if (UPB_UNLIKELY(ptr >= d->limit_ptr)) { int overrun = ptr - d->end; if (UPB_LIKELY(overrun == d->limit)) { @@ -2673,21 +2727,22 @@ static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, *(uint32_t*)msg |= hasbits; // Sync hasbits. return ptr; } else { - return fastdecode_isdonefallback(d, ptr, msg, table, hasbits, overrun); + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } } // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static bool fastdecode_checktag(uint64_t data, int tagbytes) { +static bool fastdecode_checktag(uint16_t data, int tagbytes) { if (tagbytes == 1) { return (data & 0xff) == 0; } else { - return (data & 0xffff) == 0; + return data == 0; } } @@ -2911,6 +2966,14 @@ static bool fastdecode_flippacked(uint64_t *data, int tagbytes) { return fastdecode_checktag(*data, tagbytes); } +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + /* varint fields **************************************************************/ UPB_FORCEINLINE @@ -2953,57 +3016,50 @@ done: return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_unpackedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - bool zigzag, - _upb_field_parser *packed) { - uint64_t val; - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("varint field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("need array resize\n"); - } - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return fastdecode_err(d); - val = fastdecode_munge(val, valbytes, zigzag); - memcpy(dst, &val, valbytes); - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) \ + return fastdecode_err(d); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); typedef struct { uint8_t valbytes; @@ -3032,50 +3088,37 @@ static const char *fastdecode_topackedvarint(upb_decstate *d, const char *ptr, return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_packedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, bool zigzag, - _upb_field_parser *unpacked) { - fastdecode_varintdata ctx = {valbytes, zigzag}; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, - valbytes, CARD_r); - if (UPB_UNLIKELY(!ctx.dst)) { - RETURN_GENERIC("need array resize\n"); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); - - if (UPB_UNLIKELY(ptr == NULL)) { - return fastdecode_err(d); +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + return fastdecode_err(d); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, bool zigzag, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, zigzag, - unpacked); - } else { - return fastdecode_unpackedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, card, - zigzag, packed); - } -} - #define z_ZZ true #define b_ZZ false #define v_ZZ false @@ -3086,10 +3129,10 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, #define F(card, type, valbytes, tagbytes) \ UPB_NOINLINE \ const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - type##_ZZ, \ - &upb_pr##type##valbytes##_##tagbytes##bt, \ - &upb_pp##type##valbytes##_##tagbytes##bt); \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3117,126 +3160,110 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT /* fixed fields ***************************************************************/ -UPB_FORCEINLINE -static const char *fastdecode_unpackedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *packed) { - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("fixed field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("couldn't allocate array in arena\n"); - } - } - - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - memcpy(dst, ptr, valbytes); - ptr += valbytes; - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_packedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, - _upb_field_parser *unpacked) { - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ptr += tagbytes; - int size = (uint8_t)ptr[0]; - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); - } - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr)) || - (size % valbytes) != 0) { - return fastdecode_err(d); +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \ + (size % valbytes) != 0)) { \ + return fastdecode_err(d); \ + } \ + \ + upb_array **arr_p = fastdecode_fieldmem(msg, data); \ + upb_array *arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + return fastdecode_err(d); \ + } \ + } else { \ + _upb_array_resize(arr, elems, &d->arena); \ + } \ + \ + char *dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->len = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ } - upb_array **arr_p = fastdecode_fieldmem(msg, data); - upb_array *arr = *arr_p; - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - int elems = size / valbytes; - - if (UPB_LIKELY(!arr)) { - *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); - if (!arr) { - return fastdecode_err(d); - } - } else { - _upb_array_resize(arr, elems, &d->arena); - } - - char *dst = _upb_array_ptr(arr); - memcpy(dst, ptr, size); - arr->len = elems; - - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, unpacked); - } else { - return fastdecode_unpackedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, card, - packed); - } -} - /* Generate all combinations: * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - &upb_ppf##valbytes##_##tagbytes##bt, \ - &upb_prf##valbytes##_##tagbytes##bt); \ +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3255,6 +3282,8 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED /* string fields **************************************************************/ @@ -3266,56 +3295,54 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d, UPB_NOINLINE static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, upb_strview *dst) { + uint64_t hasbits, uint64_t data) { + upb_strview *dst = (upb_strview*)data; if (!decode_verifyutf8_inl(dst->data, dst->size)) { return fastdecode_err(d); } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_longstring(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst, - bool validate_utf8) { - int size = (uint8_t)ptr[0]; // Could plumb through hasbits. - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \ + dst->size = 0; \ + return fastdecode_err(d); \ + } \ + \ + if (d->alias) { \ + dst->data = ptr; \ + dst->size = size; \ + } else { \ + char *data = upb_arena_malloc(&d->arena, size); \ + if (!data) { \ + return fastdecode_err(d); \ + } \ + memcpy(data, ptr, size); \ + dst->data = data; \ + dst->size = size; \ + } \ + \ + ptr += size; \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { - dst->size = 0; - return fastdecode_err(d); - } - - if (d->alias) { - dst->data = ptr; - dst->size = size; - } else { - char *data = upb_arena_malloc(&d->arena, size); - if (!data) { - return fastdecode_err(d); - } - memcpy(data, ptr, size); - dst->data = data; - dst->size = size; - } - - if (validate_utf8) { - return fastdecode_verifyutf8(d, ptr + size, msg, table, hasbits, dst); - } else { - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); - } -} - UPB_NOINLINE static const char *fastdecode_longstring_utf8(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, true); + const char *ptr, upb_msg *msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); } UPB_NOINLINE @@ -3323,8 +3350,9 @@ static const char *fastdecode_longstring_noutf8(struct upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, false); + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); } UPB_FORCEINLINE @@ -3337,156 +3365,165 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, UPB_POISON_MEMORY_REGION(data + size, copy - size); } -UPB_FORCEINLINE -static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - size_t arena_has; - size_t common_has; - char *buf; - - UPB_ASSERT(!d->alias); - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (uint8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->size = size; - - buf = d->arena.head.ptr; - arena_has = _upb_arenahas(&d->arena); - common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); - - if (UPB_LIKELY(size <= 15 - tagbytes)) { - if (arena_has < 16) goto longstr; - d->arena.head.ptr += 16; - memcpy(buf, ptr - tagbytes - 1, 16); - dst->data = buf + tagbytes + 1; - } else if (UPB_LIKELY(size <= 32)) { - if (UPB_UNLIKELY(common_has < 32)) goto longstr; - fastdecode_docopy(d, ptr, size, 32, buf, dst); - } else if (UPB_LIKELY(size <= 64)) { - if (UPB_UNLIKELY(common_has < 64)) goto longstr; - fastdecode_docopy(d, ptr, size, 64, buf, dst); - } else if (UPB_LIKELY(size < 128)) { - if (UPB_UNLIKELY(common_has < 128)) goto longstr; - fastdecode_docopy(d, ptr, size, 128, buf, dst); - } else { - goto longstr; - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); - -longstr: - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } -} - -UPB_FORCEINLINE -static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, _upb_field_parser *copyfunc, - bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("string field tag mismatch\n"); - } - - if (UPB_UNLIKELY(!d->alias)) { - return copyfunc(UPB_PARSE_ARGS); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (int8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->data = ptr; - dst->size = size; - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - if (UPB_UNLIKELY(!d->alias)) { - // Buffer flipped and we can't alias any more. Bounce to copyfunc(), - // but via dispatch since we need to reload table data also. - fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - } - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char *buf; \ + \ + UPB_ASSERT(!d->alias); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_arenahas(&d->arena); \ + common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) \ + goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY(!d->alias)) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->data = ptr; \ + dst->size = size; \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + if (UPB_UNLIKELY(!d->alias)) { \ + /* Buffer flipped and we can't alias any more. Bounce to */ \ + /* copyfunc(), but via dispatch since we need to reload table */ \ + /* data also. */ \ + fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + } \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ @@ -3494,16 +3531,16 @@ again: #define s_VALIDATE true #define b_VALIDATE false -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - type##_VALIDATE); \ - } \ - const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - &upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ } #define UTF8(card, tagbytes) \ @@ -3522,6 +3559,9 @@ TAGBYTES(r) #undef b_VALIDATE #undef F #undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING /* message fields *************************************************************/ @@ -3554,82 +3594,82 @@ UPB_FORCEINLINE static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr, void *ctx) { fastdecode_submsgdata *submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0); + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); UPB_ASSUME(ptr != NULL); return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, - int msg_ceil_bytes, upb_card card) { - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("submessage field tag mismatch\n"); - } - - if (--d->depth == 0) return fastdecode_err(d); - - upb_msg **dst; - uint32_t submsg_idx = (data >> 16) & 0xff; - const upb_msglayout *tablep = decode_totablep(table); - const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; - fastdecode_arr farr; - - if (subtablep->table_mask == (uint8_t)-1) { - RETURN_GENERIC("submessage doesn't have fast tables."); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_msg *), card); - - if (card == CARD_s) { - *(uint32_t*)msg |= hasbits; - hasbits = 0; - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg*)); - } - - submsg.msg = *dst; - - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); - - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { - return fastdecode_err(d); - } - - if (card == CARD_r) { - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - d->depth++; - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - d->depth++; - return ptr; - } - } - - d->depth++; - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - return fastdecode_submsg(UPB_PARSE_ARGS, tagbytes, ceil_arg, CARD_##card); \ +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) return fastdecode_err(d); \ + \ + upb_msg **dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_msglayout *tablep = decode_totablep(table); \ + const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_msg *), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t *)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg *)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + return fastdecode_err(d); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } #define SIZES(card, tagbytes) \ @@ -3650,9 +3690,11 @@ TAGBYTES(r) #undef TAGBYTES #undef SIZES #undef F +#undef FASTDECODE_SUBMSG #endif /* UPB_FASTTABLE */ -/* This file was generated by upbc (the upb compiler) from the input + +/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4134,7 +4176,8 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { }; -/* This file was generated by upbc (the upb compiler) from the input + +/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upbdefs.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4519,6 +4562,7 @@ upb_def_init google_protobuf_descriptor_proto_upbdefinit = { UPB_STRVIEW_INIT(descriptor, 7601) }; +/** upb/def.c ************************************************************/ #include #include @@ -4556,7 +4600,6 @@ struct upb_fielddef { uint32_t number_; uint16_t index_; uint16_t layout_index; - uint32_t selector_base; /* Used to index into a upb::Handlers table. */ bool is_extension_; bool lazy_; bool packed_; @@ -4569,8 +4612,6 @@ struct upb_msgdef { const upb_msglayout *layout; const upb_filedef *file; const char *full_name; - uint32_t selector_count; - uint32_t submsg_field_count; /* Tables for looking up fields by number and name. */ upb_inttable itof; @@ -4700,30 +4741,6 @@ int cmp_fields(const void *p1, const void *p2) { return field_rank(f1) - field_rank(f2); } -/* A few implementation details of handlers. We put these here to avoid - * a def -> handlers dependency. */ - -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ - -static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; -} - -static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; -} - static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } @@ -4815,8 +4832,7 @@ bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { upb_value v; - return upb_inttable_lookup32(&def->iton, num, &v) ? - upb_value_getcstr(v) : NULL; + return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getcstr(v) : NULL; } const char *upb_enum_iter_name(upb_enum_iter *iter) { @@ -4905,10 +4921,6 @@ const char *upb_fielddef_jsonname(const upb_fielddef *f) { return f->json_name; } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; -} - const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { return f->file; } @@ -5071,18 +5083,10 @@ upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { return m->file->syntax; } -size_t upb_msgdef_selectorcount(const upb_msgdef *m) { - return m->selector_count; -} - -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { - return m->submsg_field_count; -} - const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { upb_value val; - return upb_inttable_lookup32(&m->itof, i, &val) ? - upb_value_getconstptr(val) : NULL; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, @@ -5290,8 +5294,8 @@ const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { upb_value val; - return upb_inttable_lookup32(&o->itof, num, &val) ? - upb_value_getptr(val) : NULL; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; } void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { @@ -5371,7 +5375,6 @@ void upb_symtab_free(upb_symtab *s) { upb_symtab *upb_symtab_new(void) { upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; if (!s) { return NULL; @@ -5379,10 +5382,9 @@ upb_symtab *upb_symtab_new(void) { s->arena = upb_arena_new(); s->bytes_loaded = 0; - alloc = upb_arena_alloc(s->arena); - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + if (!upb_strtable_init(&s->syms, 32, s->arena) || + !upb_strtable_init(&s->files, 4, s->arena)) { upb_arena_free(s->arena); upb_gfree(s); s = NULL; @@ -5438,8 +5440,7 @@ int upb_symtab_filecount(const upb_symtab *s) { typedef struct { upb_symtab *symtab; upb_filedef *file; /* File we are building. */ - upb_arena *file_arena; /* Allocate defs here. */ - upb_alloc *alloc; /* Alloc of file_arena, for tables. */ + upb_arena *arena; /* Allocate defs here. */ const upb_msglayout **layouts; /* NULL if we should build layouts. */ upb_status *status; /* Record errors here. */ jmp_buf err; /* longjmp() on error. */ @@ -5461,7 +5462,7 @@ static void symtab_oomerr(symtab_addctx *ctx) { } void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { - void *ret = upb_arena_malloc(ctx->file_arena, bytes); + void *ret = upb_arena_malloc(ctx->arena, bytes); if (!ret) symtab_oomerr(ctx); return ret; } @@ -5568,13 +5569,21 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msg_field_iter it; upb_msg_oneof_iter oit; size_t hasbit; - size_t submsg_count = m->submsg_field_count; + size_t field_count = upb_msgdef_numfields(m); + size_t submsg_count = 0; const upb_msglayout **submsgs; upb_msglayout_field *fields; memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); - fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + /* Count sub-messages. */ + for (size_t i = 0; i < field_count; i++) { + if (upb_fielddef_issubmsg(&m->fields[i])) { + submsg_count++; + } + } + + fields = symtab_alloc(ctx, field_count * sizeof(*fields)); submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); @@ -5725,51 +5734,8 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { assign_layout_indices(m, fields); } -static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); -} - static char *strviewdup(symtab_addctx *ctx, upb_strview view) { - return upb_strdup2(view.data, view.size, ctx->alloc); + return upb_strdup2(view.data, view.size, ctx->arena); } static bool streql2(const char *a, size_t n, const char *b) { @@ -5880,9 +5846,9 @@ static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { symtab_errf(ctx, "duplicate symbol '%s'", name); } - upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); size_t len = strlen(name); - CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); + CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v, + ctx->symtab->arena)); } /* Given a symbol and the base symbol inside which it is defined, find the @@ -5915,7 +5881,8 @@ static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, } notfound: - symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); + symtab_errf(ctx, "couldn't resolve name '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(sym)); } static void create_oneofdef( @@ -5933,10 +5900,10 @@ static void create_oneofdef( v = pack_def(o, UPB_DEFTYPE_ONEOF); symtab_add(ctx, o->full_name, v); - CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena)); - CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); + CHK_OOM(upb_inttable_init(&o->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena)); } static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { @@ -5992,8 +5959,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_INT64: { - /* XXX: Need to write our own strtoll, since it's not available in c89. */ - int64_t val = strtol(str, &end, 0); + long long val = strtoll(str, &end, 0); if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { goto invalid; } @@ -6009,8 +5975,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_UINT64: { - /* XXX: Need to write our own strtoull, since it's not available in c89. */ - uint64_t val = strtoul(str, &end, 0); + unsigned long long val = strtoull(str, &end, 0); if (val > UINT64_MAX || errno == ERANGE || *end) { goto invalid; } @@ -6026,8 +5991,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_FLOAT: { - /* XXX: Need to write our own strtof, since it's not available in c89. */ - float val = strtod(str, &end); + float val = strtof(str, &end); if (errno == ERANGE || *end) { goto invalid; } @@ -6093,7 +6057,6 @@ static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { static void create_fielddef( symtab_addctx *ctx, const char *prefix, upb_msgdef *m, const google_protobuf_FieldDescriptorProto *field_proto) { - upb_alloc *alloc = ctx->alloc; upb_fielddef *f; const google_protobuf_FieldOptions *options; upb_strview name; @@ -6129,7 +6092,8 @@ static void create_fielddef( upb_value v, field_v, json_v; size_t json_size; - f = (upb_fielddef*)&m->fields[m->field_count++]; + f = (upb_fielddef*)&m->fields[m->field_count]; + f->index_ = m->field_count++; f->msgdef = m; f->is_extension_ = false; @@ -6150,12 +6114,12 @@ static void create_fielddef( v = upb_value_constptr(f); json_size = strlen(json_name); - CHK_OOM( - upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); - CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v, + ctx->arena)); + CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena)); if (strcmp(shortname, json_name) != 0) { - upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + upb_strtable_insert(&m->ntof, json_name, json_size, json_v, ctx->arena); } if (ctx->layouts) { @@ -6218,15 +6182,16 @@ static void create_fielddef( symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); } - oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; + oneof = (upb_oneofdef *)&m->oneofs[oneof_index]; f->oneof = oneof; oneof->field_count++; if (f->proto3_optional_) { oneof->synthetic = true; } - CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena)); + CHK_OOM( + upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena)); } else { f->oneof = NULL; if (f->proto3_optional_) { @@ -6269,8 +6234,8 @@ static void create_enumdef( symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); - CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); + CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena)); + CHK_OOM(upb_inttable_init(&e->iton, ctx->arena)); e->file = ctx->file; e->defaultval = 0; @@ -6297,16 +6262,15 @@ static void create_enumdef( } CHK_OOM(name2) - CHK_OOM( - upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&e->ntoi, name2, strlen(name2), v, ctx->arena)); if (!upb_inttable_lookup(&e->iton, num, NULL)) { upb_value v = upb_value_cstr(name2); - CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); + CHK_OOM(upb_inttable_insert(&e->iton, num, v, ctx->arena)); } } - upb_inttable_compact2(&e->iton, ctx->alloc); + upb_inttable_compact(&e->iton, ctx->arena); } static void create_msgdef(symtab_addctx *ctx, const char *prefix, @@ -6330,9 +6294,8 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); - CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field, - ctx->alloc)); + CHK_OOM(upb_inttable_init(&m->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena)); m->file = ctx->file; m->map_entry = false; @@ -6364,10 +6327,9 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, create_fielddef(ctx, m->full_name, m, fields[i]); } - assign_msg_indices(ctx, m); finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); - upb_inttable_compact2(&m->itof, ctx->alloc); + upb_inttable_compact(&m->itof, ctx->arena); /* This message is built. Now build nested messages and enums. */ @@ -6596,19 +6558,18 @@ static void build_filedef( } static void remove_filedef(upb_symtab *s, upb_filedef *file) { - upb_alloc *alloc = upb_arena_alloc(s->arena); int i; for (i = 0; i < file->msg_count; i++) { const char *name = file->msgs[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->enum_count; i++) { const char *name = file->enums[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->ext_count; i++) { const char *name = file->exts[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } } @@ -6626,8 +6587,7 @@ static const upb_filedef *_upb_symtab_addfile( ctx.file = file; ctx.symtab = s; - ctx.file_arena = file_arena; - ctx.alloc = upb_arena_alloc(file_arena); + ctx.arena = file_arena; ctx.layouts = layouts; ctx.status = status; @@ -6642,8 +6602,8 @@ static const upb_filedef *_upb_symtab_addfile( file = NULL; } else { build_filedef(&ctx, file, file_proto); - upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), ctx.alloc); + upb_strtable_insert(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.arena); UPB_ASSERT(upb_ok(status)); upb_arena_fuse(s->arena, file_arena); } @@ -6717,6 +6677,7 @@ upb_arena *_upb_symtab_arena(const upb_symtab *s) { #undef CHK_OOM +/** upb/reflection.c ************************************************************/ #include @@ -6827,40 +6788,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { return _upb_msg_getraw(msg, f); } else { - /* TODO(haberman): change upb_fielddef to not require this switch(). */ - upb_msgval val = {0}; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: - val.int32_val = upb_fielddef_defaultint32(f); - break; - case UPB_TYPE_INT64: - val.int64_val = upb_fielddef_defaultint64(f); - break; - case UPB_TYPE_UINT32: - val.uint32_val = upb_fielddef_defaultuint32(f); - break; - case UPB_TYPE_UINT64: - val.uint64_val = upb_fielddef_defaultuint64(f); - break; - case UPB_TYPE_FLOAT: - val.float_val = upb_fielddef_defaultfloat(f); - break; - case UPB_TYPE_DOUBLE: - val.double_val = upb_fielddef_defaultdouble(f); - break; - case UPB_TYPE_BOOL: - val.bool_val = upb_fielddef_defaultbool(f); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); - break; - case UPB_TYPE_MESSAGE: - val.msg_val = NULL; - break; - } - return val; + return upb_fielddef_default(f); } } @@ -7120,6 +7048,7 @@ upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ +/** upb/json_decode.c ************************************************************/ #include #include @@ -8030,17 +7959,17 @@ static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { return; } - if (upb_fielddef_realcontainingoneof(f) && - upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { - jsondec_err(d, "More than one field for this oneof."); - } - if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { /* JSON "null" indicates a default value, so no need to set anything. */ jsondec_null(d); return; } + if (upb_fielddef_realcontainingoneof(f) && + upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + preserved = d->debug_field; d->debug_field = f; @@ -8544,6 +8473,9 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, const upb_msgdef *m, const upb_symtab *any_pool, int options, upb_arena *arena, upb_status *status) { jsondec d; + + if (size == 0) return true; + d.ptr = buf; d.end = buf + size; d.arena = arena; @@ -8562,6 +8494,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, return true; } +/** upb/json_encode.c ************************************************************/ #include #include @@ -8591,7 +8524,7 @@ static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m); + const upb_msgdef *m, bool first); static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { @@ -8622,8 +8555,10 @@ static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { memcpy(e->ptr, data, len); e->ptr += len; } else { - if (have) memcpy(e->ptr, data, have); - e->ptr += have; + if (have) { + memcpy(e->ptr, data, have); + e->ptr += have; + } e->overflow += (len - have); } } @@ -8645,7 +8580,7 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { if (UPB_LIKELY(have > n)) { e->ptr += n; } else { - e->ptr += have; + e->ptr = UPB_PTRADD(e->ptr, have); e->overflow += (n - have); } } @@ -8749,7 +8684,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const unsigned char *ptr = (unsigned char*)str.data; - const unsigned char *end = ptr + str.size; + const unsigned char *end = UPB_PTRADD(ptr, str.size); char buf[4]; jsonenc_putstr(e, "\""); @@ -8785,7 +8720,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static void jsonenc_stringbody(jsonenc *e, upb_strview str) { const char *ptr = str.data; - const char *end = ptr + str.size; + const char *end = UPB_PTRADD(ptr, str.size); while (ptr < end) { switch (*ptr) { @@ -8901,14 +8836,13 @@ static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{\"@type\":"); jsonenc_string(e, type_url); - jsonenc_putstr(e, ","); if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */ - jsonenc_msgfields(e, any, any_m); + jsonenc_msgfields(e, any, any_m, false); } else { /* Well-known type: {"@type": "...","value": } */ - jsonenc_putstr(e, "\"value\":"); + jsonenc_putstr(e, ",\"value\":"); jsonenc_msgfield(e, any, any_m); } @@ -9211,10 +9145,9 @@ static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, } static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m) { + const upb_msgdef *m, bool first) { upb_msgval val; const upb_fielddef *f; - bool first = true; if (e->options & UPB_JSONENC_EMITDEFAULTS) { /* Iterate over all fields. */ @@ -9237,7 +9170,7 @@ static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{"); - jsonenc_msgfields(e, msg, m); + jsonenc_msgfields(e, msg, m, true); jsonenc_putstr(e, "}"); } @@ -9259,7 +9192,7 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, e.buf = buf; e.ptr = buf; - e.end = buf + size; + e.end = UPB_PTRADD(buf, size); e.overflow = 0; e.options = options; e.ext_pool = ext_pool; @@ -9272,27 +9205,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, if (e.arena) upb_arena_free(e.arena); return jsonenc_nullz(&e, size); } + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index bd72cd9c08..77a87c7691 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -1,26 +1,53 @@ /* Amalgamated source file */ -#include /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -136,9 +163,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#if defined(__has_attribute) +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -149,7 +207,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -193,55 +251,36 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif + +/** upb/decode.h ************************************************************/ /* -** upb_decode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_decode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_DECODE_H_ #define UPB_DECODE_H_ + +/** upb/msg.h ************************************************************/ /* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ + * Public APIs for message operations that do not require descriptors. + * These functions can be used even in build that does not want to depend on + * reflection or descriptors. + * + * Descriptor-based reflection functionality lives in reflection.h. + */ #ifndef UPB_MSG_H_ #define UPB_MSG_H_ -#include -#include -#include - -/* -** upb_table -** -** This header is INTERNAL-ONLY! Its interfaces are not public or stable! -** This file defines very fast int->upb_value (inttable) and string->upb_value -** (strtable) hash tables. -** -** The table uses chained scatter with Brent's variation (inspired by the Lua -** implementation of hash tables). The hash function for strings is Austin -** Appleby's "MurmurHash." -** -** The inttable uses uintptr_t as its key, which guarantees it can be used to -** store pointers or integers of at least 32 bits (upb isn't really useful on -** systems where sizeof(void*) < 4). -** -** The table must be homogeneous (all values of the same type). In debug -** mode, we check this on insert and lookup. -*/ +#include -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ -#include -#include +/** upb/upb.h ************************************************************/ /* -** This file contains shared definitions that are widely used across upb. -*/ + * This file contains shared definitions that are widely used across upb. + */ #ifndef UPB_H_ #define UPB_H_ @@ -399,7 +438,7 @@ typedef struct { upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc); void upb_arena_free(upb_arena *a); bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func); -void upb_arena_fuse(upb_arena *a, upb_arena *b); +bool upb_arena_fuse(upb_arena *a, upb_arena *b); void *_upb_arena_slowmalloc(upb_arena *a, size_t size); UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } @@ -578,55 +617,134 @@ UPB_INLINE int _upb_lg2ceilsize(int x) { #endif /* UPB_H_ */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef void upb_msg; + +/* For users these are opaque. They can be obtained from upb_msgdef_layout() + * but users cannot access any of the members. */ +struct upb_msglayout; +typedef struct upb_msglayout upb_msglayout; + +/* Adds unknown data (serialized protobuf data) to the given message. The data + * is copied into the message instance. */ +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena); + +/* Returns a reference to the message's unknown data. */ +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MSG_INT_H_ */ + +/* Must be last. */ #ifdef __cplusplus extern "C" { #endif +enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ + UPB_DECODE_ALIAS = 1, +}; -/* upb_value ******************************************************************/ +#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) -/* A tagged union (stored untagged inside the table) so that we can check that - * clients calling table accessors are correctly typed without having to have - * an explosion of accessors. */ -typedef enum { - UPB_CTYPE_INT32 = 1, - UPB_CTYPE_INT64 = 2, - UPB_CTYPE_UINT32 = 3, - UPB_CTYPE_UINT64 = 4, - UPB_CTYPE_BOOL = 5, - UPB_CTYPE_CSTR = 6, - UPB_CTYPE_PTR = 7, - UPB_CTYPE_CONSTPTR = 8, - UPB_CTYPE_FPTR = 9, - UPB_CTYPE_FLOAT = 10, - UPB_CTYPE_DOUBLE = 11 -} upb_ctype_t; +bool _upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena, int options); + +UPB_INLINE +bool upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena) { + return _upb_decode(buf, size, msg, l, arena, 0); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_DECODE_H_ */ + +/** upb/decode_internal.h ************************************************************/ +/* + * Internal implementation details of the decoder that are shared between + * decode.c and decode_fast.c. + */ + +#ifndef UPB_DECODE_INT_H_ +#define UPB_DECODE_INT_H_ + +#include + + +/** upb/msg_internal.h ************************************************************//* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MSG_INT_H_ +#define UPB_MSG_INT_H_ + +#include +#include +#include + + +/** upb/table_internal.h ************************************************************/ +/* + * upb_table + * + * This header is INTERNAL-ONLY! Its interfaces are not public or stable! + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. + * + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). + * + * The table must be homogeneous (all values of the same type). In debug + * mode, we check this on insert and lookup. + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_value ******************************************************************/ typedef struct { uint64_t val; } upb_value; -/* Like strdup(), which isn't always available since it's not ANSI C. */ -char *upb_strdup(const char *s, upb_alloc *a); /* Variant that works with a length-delimited rather than NULL-delimited string, * as supported by strtable. */ -char *upb_strdup2(const char *s, size_t len, upb_alloc *a); - -UPB_INLINE char *upb_gstrdup(const char *s) { - return upb_strdup(s, &upb_alloc_global); -} +char *upb_strdup2(const char *s, size_t len, upb_arena *a); UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) { v->val = val; } -UPB_INLINE upb_value _upb_value_val(uint64_t val) { - upb_value ret; - _upb_value_setval(&ret, val); - return ret; -} - /* For each value ctype, define the following set of functions: * * // Get/set an int32 from a upb_value. @@ -734,14 +852,7 @@ typedef struct { uint32_t mask; /* Mask to turn hash value -> bucket. */ uint32_t max_count; /* Max count before we hit our load limit. */ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */ - - /* Hash table entries. - * Making this const isn't entirely accurate; what we really want is for it to - * have the same const-ness as the table it's inside. But there's no way to - * declare that in C. So we have to make it const so that we can statically - * initialize const hash tables. Then we cast away const when we have to. - */ - const upb_tabent *entries; + upb_tabent *entries; } upb_table; typedef struct { @@ -755,8 +866,6 @@ typedef struct { size_t array_count; /* Array part number of elements. */ } upb_inttable; -#define UPB_ARRAY_EMPTYENT -1 - UPB_INLINE size_t upb_table_size(const upb_table *t) { if (t->size_lg2 == 0) return 0; @@ -769,48 +878,10 @@ UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key == 0; } -/* Used by some of the unit tests for generic hashing functionality. */ -uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed); - -UPB_INLINE uintptr_t upb_intkey(uintptr_t key) { - return key; -} - -UPB_INLINE uint32_t upb_inthash(uintptr_t key) { - return (uint32_t)key; -} - -static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { - return t->entries + (hash & t->mask); -} - -UPB_INLINE bool upb_arrhas(upb_tabval key) { - return key.val != (uint64_t)-1; -} - /* Initialize and uninitialize a table, respectively. If memory allocation * failed, false is returned that the table is uninitialized. */ -bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a); -bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a); -void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a); -void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a); - -UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) { - return upb_inttable_init2(table, ctype, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) { - return upb_strtable_init2(table, ctype, 4, &upb_alloc_global); -} - -UPB_INLINE void upb_inttable_uninit(upb_inttable *table) { - upb_inttable_uninit2(table, &upb_alloc_global); -} - -UPB_INLINE void upb_strtable_uninit(upb_strtable *table) { - upb_strtable_uninit2(table, &upb_alloc_global); -} +bool upb_inttable_init(upb_inttable *table, upb_arena *a); +bool upb_strtable_init(upb_strtable *table, size_t expected_size, upb_arena *a); /* Returns the number of values in the table. */ size_t upb_inttable_count(const upb_inttable *t); @@ -818,12 +889,6 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -void upb_inttable_packedsize(const upb_inttable *t, size_t *size); -void upb_strtable_packedsize(const upb_strtable *t, size_t *size); -upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs, - size_t size); -upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs, - size_t size); void upb_strtable_clear(upb_strtable *t); /* Inserts the given key into the hashtable with the given value. The key must @@ -833,26 +898,10 @@ void upb_strtable_clear(upb_strtable *t); * * If a table resize was required but memory allocation failed, false is * returned and the table is unchanged. */ -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a); -bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len, - upb_value val, upb_alloc *a); - -UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key, - upb_value val) { - return upb_inttable_insert2(t, key, val, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key, - size_t len, upb_value val) { - return upb_strtable_insert3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key, - upb_value val) { - return upb_strtable_insert2(t, key, strlen(key), val); -} +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a); +bool upb_strtable_insert(upb_strtable *t, const char *key, size_t len, + upb_value val, upb_arena *a); /* Looks up key in this table, returning "true" if the key was found. * If v is non-NULL, copies the value for this key into *v. */ @@ -869,74 +918,21 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, /* Removes an item from the table. Returns true if the remove was successful, * and stores the removed item in *val if non-NULL. */ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc); - -UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key, - size_t len, upb_value *val) { - return upb_strtable_remove3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, - upb_value *v) { - return upb_strtable_remove2(t, key, strlen(key), v); -} +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val); /* Updates an existing entry in an inttable. If the entry does not exist, * returns false and does nothing. Unlike insert/remove, this does not * invalidate iterators. */ bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val); -/* Convenience routines for inttables with pointer keys. */ -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a); -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); -bool upb_inttable_lookupptr( - const upb_inttable *t, const void *key, upb_value *val); - -UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key, - upb_value val) { - return upb_inttable_insertptr2(t, key, val, &upb_alloc_global); -} - /* Optimizes the table for the current set of entries, for both memory use and * lookup time. Client should call this after all entries have been inserted; * inserting more entries is legal, but will likely require a table resize. */ -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a); - -UPB_INLINE void upb_inttable_compact(upb_inttable *t) { - upb_inttable_compact2(t, &upb_alloc_global); -} - -/* A special-case inlinable version of the lookup routine for 32-bit - * integers. */ -UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key, - upb_value *v) { - *v = upb_value_int32(0); /* Silence compiler warnings. */ - if (key < t->array_size) { - upb_tabval arrval = t->array[key]; - if (upb_arrhas(arrval)) { - _upb_value_setval(v, arrval.val); - return true; - } else { - return false; - } - } else { - const upb_tabent *e; - if (t->t.entries == NULL) return false; - for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) { - if ((uint32_t)e->key == key) { - _upb_value_setval(v, e->val.val); - return true; - } - if (e->next == NULL) return false; - } - } -} +void upb_inttable_compact(upb_inttable *t, upb_arena *a); /* Exposed for testing only. */ -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a); +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a); /* Iterators ******************************************************************/ @@ -1032,10 +1028,6 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, extern "C" { #endif -#define PTR_AT(msg, ofs, type) (type*)((const char*)msg + ofs) - -typedef void upb_msg; - /** upb_msglayout *************************************************************/ /* upb_msglayout represents the memory layout of a given upb_msgdef. The @@ -1070,7 +1062,7 @@ typedef struct { _upb_field_parser *field_parser; } _upb_fasttable_entry; -typedef struct upb_msglayout { +struct upb_msglayout { const struct upb_msglayout *const* submsgs; const upb_msglayout_field *fields; /* Must be aligned to sizeof(void*). Doesn't include internal members like @@ -1082,7 +1074,7 @@ typedef struct upb_msglayout { /* To constant-initialize the tables of variable length, we need a flexible * array member, and we need to compile in C99 mode. */ _upb_fasttable_entry fasttable[]; -} upb_msglayout; +}; /** upb_msg *******************************************************************/ @@ -1137,21 +1129,18 @@ void _upb_msg_discardunknown_shallow(upb_msg *msg); bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** Hasbit access *************************************************************/ UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) { - return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; + return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; } UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); + (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); } UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); + (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); } UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) { @@ -1177,11 +1166,11 @@ UPB_INLINE void _upb_clearhas_field(const upb_msg *msg, /** Oneof case access *********************************************************/ UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) { - return PTR_AT(msg, case_ofs, uint32_t); + return UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) { - return *PTR_AT(msg, case_ofs, uint32_t); + return *UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) { @@ -1200,7 +1189,7 @@ UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg, } UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) { - return *PTR_AT(msg, ofs, const upb_msg*) != NULL; + return *UPB_PTR_AT(msg, ofs, const upb_msg*) != NULL; } UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) { @@ -1277,7 +1266,7 @@ UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size, UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, size_t *size) { - const upb_array *arr = *PTR_AT(msg, ofs, const upb_array*); + const upb_array *arr = *UPB_PTR_AT(msg, ofs, const upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_constptr(arr); @@ -1289,7 +1278,7 @@ UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, size_t *size) { - upb_array *arr = *PTR_AT(msg, ofs, upb_array*); + upb_array *arr = *UPB_PTR_AT(msg, ofs, upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_ptr(arr); @@ -1302,7 +1291,7 @@ UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size, int elem_size_lg2, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); upb_array *arr = *arr_ptr; if (!arr || arr->size < size) { return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena); @@ -1315,7 +1304,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, int elem_size_lg2, const void *value, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); size_t elem_size = 1 << elem_size_lg2; upb_array *arr = *arr_ptr; void *ptr; @@ -1323,7 +1312,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena); } ptr = _upb_array_ptr(arr); - memcpy(PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); + memcpy(UPB_PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); arr->len++; return true; } @@ -1470,20 +1459,19 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) { } UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size, - void *val, size_t val_size, upb_arena *arena) { + void *val, size_t val_size, upb_arena *a) { upb_strview strkey = _upb_map_tokey(key, key_size); upb_value tabval = {0}; - if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false; - upb_alloc *a = upb_arena_alloc(arena); + if (!_upb_map_tovalue(val, val_size, &tabval, a)) return false; /* TODO(haberman): add overwrite operation to minimize number of lookups. */ - upb_strtable_remove3(&map->table, strkey.data, strkey.size, NULL, a); - return upb_strtable_insert3(&map->table, strkey.data, strkey.size, tabval, a); + upb_strtable_remove(&map->table, strkey.data, strkey.size, NULL); + return upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a); } UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) { upb_strview k = _upb_map_tokey(key, key_size); - return upb_strtable_remove3(&map->table, k.data, k.size, NULL, NULL); + return upb_strtable_remove(&map->table, k.data, k.size, NULL); } UPB_INLINE void _upb_map_clear(upb_map *map) { @@ -1515,7 +1503,7 @@ UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs, UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key, size_t key_size, void *val, size_t val_size, upb_arena *arena) { - upb_map **map = PTR_AT(msg, ofs, upb_map *); + upb_map **map = UPB_PTR_AT(msg, ofs, upb_map *); if (!*map) { *map = _upb_map_new(arena, key_size, val_size); } @@ -1548,8 +1536,7 @@ UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) { const upb_tabent *ent = (const upb_tabent*)msg; - upb_value v; - _upb_value_setval(&v, ent->val.val); + upb_value v = {ent->val.val}; _upb_map_fromvalue(v, val, size); } @@ -1612,55 +1599,14 @@ UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, return true; } -#undef PTR_AT - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_MSG_H_ */ - -/* Must be last. */ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* If set, strings will alias the input buffer instead of copying into the - * arena. */ - UPB_DECODE_ALIAS = 1, -}; - -#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) - -bool _upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena, int options); - -UPB_INLINE -bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena) { - return _upb_decode(buf, size, msg, l, arena, 0); -} - #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_DECODE_H_ */ -/* -** Internal implementation details of the decoder that are shared between -** decode.c and decode_fast.c. -*/ - -#ifndef UPB_DECODE_INT_H_ -#define UPB_DECODE_INT_H_ - -#include - +#endif /* UPB_MSG_INT_H_ */ +/** upb/upb_internal.h ************************************************************/ #ifndef UPB_INT_H_ #define UPB_INT_H_ @@ -1670,7 +1616,10 @@ typedef struct mem_block mem_block; struct upb_arena { _upb_arena_head head; - uint32_t *cleanups; + /* Stores cleanup metadata for this arena. + * - a pointer to the current cleanup counter. + * - a boolean indicating if there is an unowned initial block. */ + uintptr_t cleanup_metadata; /* Allocator to allocate arena blocks. We are responsible for freeing these * when we are destroyed. */ @@ -1792,10 +1741,11 @@ bool decode_isdone(upb_decstate *d, const char **ptr) { } } +#if UPB_FASTTABLE UPB_INLINE const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, uint32_t tag) { + uint64_t hasbits, uint64_t tag) { const upb_msglayout *table_p = decode_totablep(table); uint8_t mask = table; uint64_t data; @@ -1803,8 +1753,10 @@ const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, UPB_ASSUME((idx & 7) == 0); idx >>= 3; data = table_p->fasttable[idx].field_data ^ tag; - return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data); + UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table, + hasbits, data); } +#endif UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { uint16_t tag; @@ -1837,9 +1789,11 @@ UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr, #endif /* UPB_DECODE_INT_H_ */ + +/** upb/encode.h ************************************************************/ /* -** upb_encode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_encode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_ENCODE_H_ #define UPB_ENCODE_H_ @@ -1880,6 +1834,8 @@ UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, #endif #endif /* UPB_ENCODE_H_ */ + +/** upb/decode_fast.h ************************************************************/ // These are the specialized field parser functions for the fast parser. // Generated tables will refer to these by name. // @@ -2005,7 +1961,8 @@ TAGBYTES(r) #undef UPB_PARSE_PARAMS #endif /* UPB_DECODE_FAST_H_ */ -/* This file was generated by upbc (the upb compiler) from the input + +/** google/protobuf/descriptor.upb.h ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -3884,18 +3841,20 @@ UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_prot #endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_ */ + +/** upb/def.h ************************************************************/ /* -** Defs are upb's internal representation of the constructs that can appear -** in a .proto file: -** -** - upb_msgdef: describes a "message" construct. -** - upb_fielddef: describes a message field. -** - upb_filedef: describes a .proto file and its defs. -** - upb_enumdef: describes an enum. -** - upb_oneofdef: describes a oneof. -** -** TODO: definitions of services. -*/ + * Defs are upb's internal representation of the constructs that can appear + * in a .proto file: + * + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_filedef: describes a .proto file and its defs. + * - upb_enumdef: describes an enum. + * - upb_oneofdef: describes a oneof. + * + * TODO: definitions of services. + */ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ @@ -3991,9 +3950,6 @@ const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f); const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f); const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f); -/* Internal only. */ -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f); - /* upb_oneofdef ***************************************************************/ typedef upb_inttable_iter upb_oneof_iter; @@ -4078,10 +4034,6 @@ UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, return upb_msgdef_ntof(m, name, strlen(name)); } -/* Internal-only. */ -size_t upb_msgdef_selectorcount(const upb_msgdef *m); -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m); - /* Lookup of either field or oneof by name. Returns whether either was found. * If the return is true, then the found def will be set, and the non-found * one set to NULL. */ @@ -4196,7 +4148,8 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init); #endif /* __cplusplus */ #endif /* UPB_DEF_H_ */ -/* This file was generated by upbc (the upb compiler) from the input + +/** google/protobuf/descriptor.upbdefs.h ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4357,6 +4310,7 @@ UPB_INLINE const upb_msgdef *google_protobuf_GeneratedCodeInfo_Annotation_getmsg #endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPBDEFS_H_ */ +/** upb/reflection.h ************************************************************/ #ifndef UPB_REFLECTION_H_ #define UPB_REFLECTION_H_ @@ -4438,17 +4392,9 @@ bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, const upb_symtab *ext_pool, const upb_fielddef **f, upb_msgval *val, size_t *iter); -/* Adds unknown data (serialized protobuf data) to the given message. The data - * is copied into the message instance. */ -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena); - /* Clears all unknown field data from this message and all submessages. */ bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** upb_array *****************************************************************/ /* Creates a new array on the given arena that holds elements of this type. */ @@ -4530,6 +4476,7 @@ void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); #endif /* UPB_REFLECTION_H_ */ +/** upb/json_decode.h ************************************************************/ #ifndef UPB_JSONDECODE_H_ #define UPB_JSONDECODE_H_ @@ -4552,6 +4499,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, #endif /* UPB_JSONDECODE_H_ */ +/** upb/json_encode.h ************************************************************/ #ifndef UPB_JSONENCODE_H_ #define UPB_JSONENCODE_H_ @@ -4586,27 +4534,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, #endif #endif /* UPB_JSONENCODE_H_ */ + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/php/src/Google/Protobuf/Internal/Message.php b/php/src/Google/Protobuf/Internal/Message.php index e74943c1ab..19b48f0b50 100644 --- a/php/src/Google/Protobuf/Internal/Message.php +++ b/php/src/Google/Protobuf/Internal/Message.php @@ -240,10 +240,14 @@ class Message $field = $this->desc->getFieldByNumber($number); $oneof = $this->desc->getOneofDecl()[$field->getOneofIndex()]; $oneof_name = $oneof->getName(); - $oneof_field = $this->$oneof_name; - $oneof_field->setValue($value); - $oneof_field->setFieldName($field->getName()); - $oneof_field->setNumber($number); + if ($value === null) { + $this->$oneof_name = new OneofField($oneof); + } else { + $oneof_field = $this->$oneof_name; + $oneof_field->setValue($value); + $oneof_field->setFieldName($field->getName()); + $oneof_field->setNumber($number); + } } protected function whichOneof($oneof_name) diff --git a/php/src/Google/Protobuf/Internal/RepeatedField.php b/php/src/Google/Protobuf/Internal/RepeatedField.php index 350bbb592e..c0331ff38e 100644 --- a/php/src/Google/Protobuf/Internal/RepeatedField.php +++ b/php/src/Google/Protobuf/Internal/RepeatedField.php @@ -177,8 +177,7 @@ class RepeatedField implements \ArrayAccess, \IteratorAggregate, \Countable break; case GPBType::MESSAGE: if (is_null($value)) { - trigger_error("RepeatedField element cannot be null.", - E_USER_ERROR); + throw new \TypeError("RepeatedField element cannot be null."); } GPBUtil::checkMessage($value, $this->klass); break; diff --git a/php/tests/ArrayTest.php b/php/tests/ArrayTest.php index b687085299..9e8fcb8bea 100644 --- a/php/tests/ArrayTest.php +++ b/php/tests/ArrayTest.php @@ -602,6 +602,17 @@ class ArrayTest extends TestBase $this->assertLessThan($start, $end); } + ######################################################### + # Test incorrect types + ######################################################### + + public function testAppendNull() + { + $this->expectException(TypeError::class); + $arr = new RepeatedField(GPBType::MESSAGE, TestMessage::class); + $arr[] = null; + } + ######################################################### # Test equality ######################################################### diff --git a/php/tests/EncodeDecodeTest.php b/php/tests/EncodeDecodeTest.php index 273010e2ed..ac01ca17a3 100644 --- a/php/tests/EncodeDecodeTest.php +++ b/php/tests/EncodeDecodeTest.php @@ -940,6 +940,14 @@ class EncodeDecodeTest extends TestBase $this->expectFields($to); } + public function testJsonEncodeNullSubMessage() + { + $from = new TestMessage(); + $from->setOptionalMessage(null); + $data = $from->serializeToJsonString(); + $this->assertEquals("{}", $data); + } + public function testDecodeDuration() { $m = new Google\Protobuf\Duration(); diff --git a/php/tests/GeneratedClassTest.php b/php/tests/GeneratedClassTest.php index 5c0f0c70d0..9e176e8341 100644 --- a/php/tests/GeneratedClassTest.php +++ b/php/tests/GeneratedClassTest.php @@ -476,10 +476,12 @@ class GeneratedClassTest extends TestBase $sub_m->setA(1); $m->setOptionalMessage($sub_m); $this->assertSame(1, $m->getOptionalMessage()->getA()); + $this->assertTrue($m->hasOptionalMessage()); $null = null; $m->setOptionalMessage($null); $this->assertNull($m->getOptionalMessage()); + $this->assertFalse($m->hasOptionalMessage()); } public function testLegacyMessageField() @@ -1748,6 +1750,13 @@ class GeneratedClassTest extends TestBase $m->clear(); $this->assertFalse($m->hasOneofInt32()); $this->assertFalse($m->hasOneofString()); + + $sub_m = new Sub(); + $sub_m->setA(1); + $m->setOneofMessage($sub_m); + $this->assertTrue($m->hasOneofMessage()); + $m->setOneofMessage(null); + $this->assertFalse($m->hasOneofMessage()); } ######################################################### diff --git a/protobuf.bzl b/protobuf.bzl index a47711c7f4..9716128121 100644 --- a/protobuf.bzl +++ b/protobuf.bzl @@ -190,13 +190,13 @@ proto_gen = rule( "deps": attr.label_list(providers = ["proto"]), "includes": attr.string_list(), "protoc": attr.label( - cfg = "host", + cfg = "exec", executable = True, allow_single_file = True, mandatory = True, ), "plugin": attr.label( - cfg = "host", + cfg = "exec", allow_files = True, executable = True, ), @@ -378,7 +378,7 @@ internal_gen_well_known_protos_java = rule( ), "_protoc": attr.label( executable = True, - cfg = "host", + cfg = "exec", default = "@com_google_protobuf//:protoc", ), }, diff --git a/python/google/protobuf/pyext/message.cc b/python/google/protobuf/pyext/message.cc index 55a5f741e0..125df32b9e 100644 --- a/python/google/protobuf/pyext/message.cc +++ b/python/google/protobuf/pyext/message.cc @@ -1564,12 +1564,17 @@ static int InternalReparentFields( to_release); } - GOOGLE_CHECK_EQ(self->message->GetArena(), new_message->message->GetArena()); - - MessageReflectionFriend::UnsafeShallowSwapFields( - self->message, new_message->message, - std::vector(fields_to_swap.begin(), - fields_to_swap.end())); + if (self->message->GetArena() == new_message->message->GetArena()) { + MessageReflectionFriend::UnsafeShallowSwapFields( + self->message, new_message->message, + std::vector(fields_to_swap.begin(), + fields_to_swap.end())); + } else { + self->message->GetReflection()->SwapFields( + self->message, new_message->message, + std::vector(fields_to_swap.begin(), + fields_to_swap.end())); + } // This might delete the Python message completely if all children were moved. Py_DECREF(self); diff --git a/ruby/Rakefile b/ruby/Rakefile index 11397b0eb9..221e9b507f 100644 --- a/ruby/Rakefile +++ b/ruby/Rakefile @@ -18,6 +18,18 @@ well_known_protos = %w[ google/protobuf/wrappers.proto ] +test_protos = %w[ + tests/basic_test.proto + tests/basic_test_proto2.proto + tests/generated_code.proto + tests/generated_code_proto2.proto + tests/multi_level_nesting_test.proto + tests/test_import.proto + tests/test_import_proto2.proto + tests/test_ruby_package.proto + tests/test_ruby_package_proto2.proto +] + # These are omitted for now because we don't support proto2. proto2_protos = %w[ google/protobuf/descriptor.proto @@ -43,6 +55,14 @@ unless ENV['IN_DOCKER'] == 'true' sh "#{protoc_command} -I../src --ruby_out=lib #{input_file}" end end + + test_protos.each do |proto_file| + output_file = proto_file.sub(/\.proto$/, "_pb.rb") + genproto_output << output_file + file output_file => proto_file do |file_task| + sh "#{protoc_command} -I../src -I. --ruby_out=. #{proto_file}" + end + end end if RUBY_PLATFORM == "java" @@ -100,59 +120,6 @@ else end end - -# Proto for tests. -genproto_output << "tests/generated_code.rb" -genproto_output << "tests/generated_code_proto2.rb" -genproto_output << "tests/test_import.rb" -genproto_output << "tests/test_import_proto2.rb" -genproto_output << "tests/test_ruby_package.rb" -genproto_output << "tests/test_ruby_package_proto2.rb" -genproto_output << "tests/basic_test.rb" -genproto_output << "tests/basic_test_proto2.rb" -genproto_output << "tests/multi_level_nesting_test.rb" -genproto_output << "tests/wrappers.rb" - -file "tests/generated_code.rb" => "tests/generated_code.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/generated_code.proto" -end - -file "tests/generated_code_proto2.rb" => "tests/generated_code_proto2.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/generated_code_proto2.proto" -end - -file "tests/test_import.rb" => "tests/test_import.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_import.proto" -end - -file "tests/test_import_proto2.rb" => "tests/test_import_proto2.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_import_proto2.proto" -end - -file "tests/test_ruby_package.rb" => "tests/test_ruby_package.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_ruby_package.proto" -end - -file "tests/test_ruby_package_proto2.rb" => "tests/test_ruby_package_proto2.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_ruby_package_proto2.proto" -end - -file "tests/basic_test.rb" => "tests/basic_test.proto" do |file_task| - sh "#{protoc_command} --experimental_allow_proto3_optional -I../src -I. --ruby_out=. tests/basic_test.proto" -end - -file "tests/basic_test_proto2.rb" => "tests/basic_test_proto2.proto" do |file_task| - sh "#{protoc_command} -I../src -I. --ruby_out=. tests/basic_test_proto2.proto" -end - -file "tests/multi_level_nesting_test.rb" => "tests/multi_level_nesting_test.proto" do |file_task| - sh "#{protoc_command} -I../src -I. --ruby_out=. tests/multi_level_nesting_test.proto" -end - -file "tests/wrappers.rb" => "../src/google/protobuf/wrappers.proto" do |file_task| - sh "#{protoc_command} -I../src -I. --ruby_out=tests ../src/google/protobuf/wrappers.proto" -end - task :genproto => genproto_output task :clean do @@ -162,7 +129,7 @@ end Gem::PackageTask.new(spec) do |pkg| end -Rake::TestTask.new(:test => :build) do |t| +Rake::TestTask.new(:test => [:build, :genproto]) do |t| t.test_files = FileList["tests/*.rb"].exclude("tests/gc_test.rb", "tests/common_tests.rb") end @@ -172,7 +139,7 @@ Rake::TestTask.new(:gc_test => :build) do |t| t.test_files = FileList["tests/gc_test.rb"] end -task :build => [:clean, :compile, :genproto] +task :build => [:clean, :genproto, :compile] task :default => [:build] # vim:sw=2:et diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index ffdae6a401..c1b9b86330 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -794,6 +794,14 @@ static VALUE Message_CreateHash(const upb_msg *msg, const upb_msgdef *m) { VALUE msg_value; VALUE msg_key; + if (!is_proto2 && upb_fielddef_issubmsg(field) && + !upb_fielddef_isseq(field) && !upb_msg_has(msg, field)) { + // TODO: Legacy behavior, remove when we fix the is_proto2 differences. + msg_key = ID2SYM(rb_intern(upb_fielddef_name(field))); + rb_hash_aset(hash, msg_key, Qnil); + continue; + } + // Do not include fields that are not present (oneof or optional fields). if (is_proto2 && upb_fielddef_haspresence(field) && !upb_msg_has(msg, field)) { diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index 61762fcd99..b1b701b4ee 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -1,27 +1,54 @@ /* Amalgamated source file */ #include "ruby-upb.h" /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -137,9 +164,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#ifdef __has_attribute +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -150,7 +208,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -194,8 +252,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif +/** upb/decode.c ************************************************************/ #include #include @@ -891,7 +950,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, state.end_group = DECODE_NOGROUP; state.arena.head = arena->head; state.arena.last_size = arena->last_size; - state.arena.cleanups = arena->cleanups; + state.arena.cleanup_metadata = arena->cleanup_metadata; state.arena.parent = arena; if (UPB_UNLIKELY(UPB_SETJMP(state.err))) { @@ -902,7 +961,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, arena->head.ptr = state.arena.head.ptr; arena->head.end = state.arena.head.end; - arena->cleanups = state.arena.cleanups; + arena->cleanup_metadata = state.arena.cleanup_metadata; return ok; } @@ -911,6 +970,8 @@ bool _upb_decode(const char *buf, size_t size, void *msg, #undef OP_VARPCK_LG2 #undef OP_STRING #undef OP_SUBMSG + +/** upb/encode.c ************************************************************/ /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ @@ -1386,7 +1447,7 @@ char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, return ret; } - +/** upb/msg.c ************************************************************/ /** upb_msg *******************************************************************/ @@ -1517,7 +1578,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return NULL; } - upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a)); + upb_strtable_init(&map->table, 4, a); map->key_size = key_size; map->val_size = value_size; @@ -1638,11 +1699,13 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); return true; } + +/** upb/table.c ************************************************************/ /* -** upb_table Implementation -** -** Implementation is heavily inspired by Lua's ltable.c. -*/ + * upb_table Implementation + * + * Implementation is heavily inspired by Lua's ltable.c. + */ #include @@ -1663,9 +1726,15 @@ static const double MAX_LOAD = 0.85; * cache effects). The lower this is, the more memory we'll use. */ static const double MIN_DENSITY = 0.1; -bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } +static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } -int log2ceil(uint64_t v) { +static upb_value _upb_value_val(uint64_t val) { + upb_value ret; + _upb_value_setval(&ret, val); + return ret; +} + +static int log2ceil(uint64_t v) { int ret = 0; bool pow2 = is_pow2(v); while (v >>= 1) ret++; @@ -1673,11 +1742,7 @@ int log2ceil(uint64_t v) { return UPB_MIN(UPB_MAXARRSIZE, ret); } -char *upb_strdup(const char *s, upb_alloc *a) { - return upb_strdup2(s, strlen(s), a); -} - -char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { +char *upb_strdup2(const char *s, size_t len, upb_arena *a) { size_t n; char *p; @@ -1686,7 +1751,7 @@ char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { /* Always null-terminate, even if binary data; but don't rely on the input to * have a null-terminating byte since it may be a raw binary buffer. */ n = len + 1; - p = upb_malloc(a, n); + p = upb_arena_malloc(a, n); if (p) { memcpy(p, s, len); p[len] = 0; @@ -1721,16 +1786,24 @@ typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); /* Base table (shared code) ***************************************************/ -/* For when we need to cast away const. */ -static upb_tabent *mutable_entries(upb_table *t) { - return (upb_tabent*)t->entries; +static uint32_t upb_inthash(uintptr_t key) { + return (uint32_t)key; +} + +static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { + return t->entries + (hash & t->mask); +} + +static bool upb_arrhas(upb_tabval key) { + return key.val != (uint64_t)-1; } + static bool isfull(upb_table *t) { return t->count == t->max_count; } -static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { +static bool init(upb_table *t, uint8_t size_lg2, upb_arena *a) { size_t bytes; t->count = 0; @@ -1739,21 +1812,17 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { t->max_count = upb_table_size(t) * MAX_LOAD; bytes = upb_table_size(t) * sizeof(upb_tabent); if (bytes > 0) { - t->entries = upb_malloc(a, bytes); + t->entries = upb_arena_malloc(a, bytes); if (!t->entries) return false; - memset(mutable_entries(t), 0, bytes); + memset(t->entries, 0, bytes); } else { t->entries = NULL; } return true; } -static void uninit(upb_table *t, upb_alloc *a) { - upb_free(a, mutable_entries(t)); -} - static upb_tabent *emptyent(upb_table *t, upb_tabent *e) { - upb_tabent *begin = mutable_entries(t); + upb_tabent *begin = t->entries; upb_tabent *end = begin + upb_table_size(t); for (e = e + 1; e < end; e++) { if (upb_tabent_isempty(e)) return e; @@ -1903,9 +1972,9 @@ static size_t begin(const upb_table *t) { /* A simple "subclass" of upb_table that only adds a hash function for strings. */ -static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { +static upb_tabkey strcopy(lookupkey_t k2, upb_arena *a) { uint32_t len = (uint32_t) k2.str.len; - char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); + char *str = upb_arena_malloc(a, k2.str.len + sizeof(uint32_t) + 1); if (str == NULL) return 0; memcpy(str, &len, sizeof(uint32_t)); if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); @@ -1929,9 +1998,7 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) { return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); } -bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_strtable_init(upb_strtable *t, size_t expected_size, upb_arena *a) { // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator. size_t need_entries = (expected_size + 1) * 1204 / 1024; UPB_ASSERT(need_entries >= expected_size * 0.85); @@ -1945,14 +2012,7 @@ void upb_strtable_clear(upb_strtable *t) { memset((char*)t->t.entries, 0, bytes); } -void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { - size_t i; - for (i = 0; i < upb_table_size(&t->t); i++) - upb_free(a, (void*)t->t.entries[i].key); - uninit(&t->t, a); -} - -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a) { upb_strtable new_table; upb_strtable_iter i; @@ -1961,17 +2021,15 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { upb_strview key = upb_strtable_iter_key(&i); - upb_strtable_insert3( - &new_table, key.data, key.size, - upb_strtable_iter_value(&i), a); + upb_strtable_insert(&new_table, key.data, key.size, + upb_strtable_iter_value(&i), a); } - upb_strtable_uninit2(t, a); *t = new_table; return true; } -bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, - upb_value v, upb_alloc *a) { +bool upb_strtable_insert(upb_strtable *t, const char *k, size_t len, + upb_value v, upb_arena *a) { lookupkey_t key; upb_tabkey tabkey; uint32_t hash; @@ -1998,19 +2056,11 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, return lookup(&t->t, strkey2(key, len), v, hash, &streql); } -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc) { +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val) { uint32_t hash = table_hash(key, len); upb_tabkey tabkey; - if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { - if (alloc) { - /* Arena-based allocs don't need to free and won't pass this. */ - upb_free(alloc, (void*)tabkey); - } - return true; - } else { - return false; - } + return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql); } /* Iteration */ @@ -2108,7 +2158,7 @@ static void check(upb_inttable *t) { } bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, - upb_alloc *a) { + upb_arena *a) { size_t array_bytes; if (!init(&t->t, hsize_lg2, a)) return false; @@ -2117,9 +2167,8 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, t->array_size = UPB_MAX(1, asize); t->array_count = 0; array_bytes = t->array_size * sizeof(upb_value); - t->array = upb_malloc(a, array_bytes); + t->array = upb_arena_malloc(a, array_bytes); if (!t->array) { - uninit(&t->t, a); return false; } memset(mutable_array(t), 0xff, array_bytes); @@ -2127,18 +2176,12 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, return true; } -bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_inttable_init(upb_inttable *t, upb_arena *a) { return upb_inttable_sizedinit(t, 0, 4, a); } -void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { - uninit(&t->t, a); - upb_free(a, mutable_array(t)); -} - -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a) { +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a) { upb_tabval tabval; tabval.val = val.val; UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ @@ -2169,7 +2212,6 @@ bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, UPB_ASSERT(t->t.count == new_table.count); - uninit(&t->t, a); t->t = new_table; } insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); @@ -2213,21 +2255,7 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { return success; } -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a) { - return upb_inttable_insert2(t, (uintptr_t)key, val, a); -} - -bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, - upb_value *v) { - return upb_inttable_lookup(t, (uintptr_t)key, v); -} - -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { - return upb_inttable_remove(t, (uintptr_t)key, val); -} - -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { +void upb_inttable_compact(upb_inttable *t, upb_arena *a) { /* A power-of-two histogram of the table keys. */ size_t counts[UPB_MAXARRSIZE + 1] = {0}; @@ -2275,12 +2303,11 @@ void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { upb_inttable_begin(&i, t); for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { uintptr_t k = upb_inttable_iter_key(&i); - upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); + upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a); } UPB_ASSERT(new_t.array_size == arr_size); UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); } - upb_inttable_uninit2(t, a); *t = new_t; } @@ -2354,6 +2381,7 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, i1->array_part == i2->array_part; } +/** upb/upb.c ************************************************************/ #include #include @@ -2420,6 +2448,19 @@ static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, } } +static uint32_t *upb_cleanup_pointer(uintptr_t cleanup_metadata) { + return (uint32_t *)(cleanup_metadata & ~0x1); +} + +static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) { + return cleanup_metadata & 0x1; +} + +static uintptr_t upb_cleanup_metadata(uint32_t *cleanup, + bool has_initial_block) { + return (uintptr_t)cleanup | has_initial_block; +} + upb_alloc upb_alloc_global = {&upb_global_allocfunc}; /* upb_arena ******************************************************************/ @@ -2465,7 +2506,8 @@ static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr, a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char); a->head.end = UPB_PTR_AT(block, size, char); - a->cleanups = &block->cleanups; + a->cleanup_metadata = upb_cleanup_metadata( + &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata)); UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr); } @@ -2513,6 +2555,7 @@ upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) { a->refcount = 1; a->freelist = NULL; a->freelist_tail = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, false); upb_arena_addblock(a, a, mem, n); @@ -2540,7 +2583,7 @@ upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { a->head.ptr = mem; a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); a->freelist = NULL; - a->cleanups = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, true); return a; } @@ -2575,15 +2618,17 @@ void upb_arena_free(upb_arena *a) { bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { cleanup_ent *ent; + uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata); - if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { + if (!cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent)); + cleanups = upb_cleanup_pointer(a->cleanup_metadata); } a->head.end -= sizeof(cleanup_ent); ent = (cleanup_ent*)a->head.end; - (*a->cleanups)++; + (*cleanups)++; UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent)); ent->cleanup = func; @@ -2592,11 +2637,18 @@ bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { return true; } -void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { +bool upb_arena_fuse(upb_arena *a1, upb_arena *a2) { upb_arena *r1 = arena_findroot(a1); upb_arena *r2 = arena_findroot(a2); - if (r1 == r2) return; /* Already fused. */ + if (r1 == r2) return true; /* Already fused. */ + + /* Do not fuse initial blocks since we cannot lifetime extend them. */ + if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false; + if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false; + + /* Only allow fuse with a common allocator */ + if (r1->block_alloc != r2->block_alloc) return false; /* We want to join the smaller tree to the larger tree. * So swap first if they are backwards. */ @@ -2614,12 +2666,15 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { r1->freelist = r2->freelist; } r2->parent = r1; + return true; } -// Fast decoder: ~3x the speed of decode.c, but x86-64 specific. + +/** upb/decode_fast.c ************************************************************/ +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. // Also the table size grows by 2x. // -// Could potentially be ported to ARM64 or other 64-bit archs that pass at -// least six arguments in registers. +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. // // The overall design is to create specialized functions for every possible // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch @@ -2639,8 +2694,10 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data -#define RETURN_GENERIC(m) \ - /* fprintf(stderr, m); */ \ +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ return fastdecode_generic(d, ptr, msg, table, hasbits, 0); typedef enum { @@ -2651,21 +2708,18 @@ typedef enum { } upb_card; UPB_NOINLINE -static const char *fastdecode_isdonefallback(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits, int overrun) { +static const char *fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; ptr = decode_isdonefallback_inl(d, ptr, overrun); if (ptr == NULL) { return fastdecode_err(d); } - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits) { +static const char *fastdecode_dispatch(UPB_PARSE_PARAMS) { if (UPB_UNLIKELY(ptr >= d->limit_ptr)) { int overrun = ptr - d->end; if (UPB_LIKELY(overrun == d->limit)) { @@ -2673,21 +2727,22 @@ static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, *(uint32_t*)msg |= hasbits; // Sync hasbits. return ptr; } else { - return fastdecode_isdonefallback(d, ptr, msg, table, hasbits, overrun); + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } } // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static bool fastdecode_checktag(uint64_t data, int tagbytes) { +static bool fastdecode_checktag(uint16_t data, int tagbytes) { if (tagbytes == 1) { return (data & 0xff) == 0; } else { - return (data & 0xffff) == 0; + return data == 0; } } @@ -2911,6 +2966,14 @@ static bool fastdecode_flippacked(uint64_t *data, int tagbytes) { return fastdecode_checktag(*data, tagbytes); } +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + /* varint fields **************************************************************/ UPB_FORCEINLINE @@ -2953,57 +3016,50 @@ done: return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_unpackedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - bool zigzag, - _upb_field_parser *packed) { - uint64_t val; - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("varint field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("need array resize\n"); - } - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return fastdecode_err(d); - val = fastdecode_munge(val, valbytes, zigzag); - memcpy(dst, &val, valbytes); - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) \ + return fastdecode_err(d); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); typedef struct { uint8_t valbytes; @@ -3032,50 +3088,37 @@ static const char *fastdecode_topackedvarint(upb_decstate *d, const char *ptr, return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_packedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, bool zigzag, - _upb_field_parser *unpacked) { - fastdecode_varintdata ctx = {valbytes, zigzag}; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, - valbytes, CARD_r); - if (UPB_UNLIKELY(!ctx.dst)) { - RETURN_GENERIC("need array resize\n"); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); - - if (UPB_UNLIKELY(ptr == NULL)) { - return fastdecode_err(d); +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + return fastdecode_err(d); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, bool zigzag, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, zigzag, - unpacked); - } else { - return fastdecode_unpackedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, card, - zigzag, packed); - } -} - #define z_ZZ true #define b_ZZ false #define v_ZZ false @@ -3086,10 +3129,10 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, #define F(card, type, valbytes, tagbytes) \ UPB_NOINLINE \ const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - type##_ZZ, \ - &upb_pr##type##valbytes##_##tagbytes##bt, \ - &upb_pp##type##valbytes##_##tagbytes##bt); \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3117,126 +3160,110 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT /* fixed fields ***************************************************************/ -UPB_FORCEINLINE -static const char *fastdecode_unpackedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *packed) { - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("fixed field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("couldn't allocate array in arena\n"); - } +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \ + (size % valbytes) != 0)) { \ + return fastdecode_err(d); \ + } \ + \ + upb_array **arr_p = fastdecode_fieldmem(msg, data); \ + upb_array *arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + return fastdecode_err(d); \ + } \ + } else { \ + _upb_array_resize(arr, elems, &d->arena); \ + } \ + \ + char *dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->len = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - memcpy(dst, ptr, valbytes); - ptr += valbytes; - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_packedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, - _upb_field_parser *unpacked) { - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ptr += tagbytes; - int size = (uint8_t)ptr[0]; - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); - } - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr)) || - (size % valbytes) != 0) { - return fastdecode_err(d); - } - - upb_array **arr_p = fastdecode_fieldmem(msg, data); - upb_array *arr = *arr_p; - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - int elems = size / valbytes; - - if (UPB_LIKELY(!arr)) { - *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); - if (!arr) { - return fastdecode_err(d); - } - } else { - _upb_array_resize(arr, elems, &d->arena); - } - - char *dst = _upb_array_ptr(arr); - memcpy(dst, ptr, size); - arr->len = elems; - - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, unpacked); - } else { - return fastdecode_unpackedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, card, - packed); - } -} - /* Generate all combinations: * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - &upb_ppf##valbytes##_##tagbytes##bt, \ - &upb_prf##valbytes##_##tagbytes##bt); \ +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3255,6 +3282,8 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED /* string fields **************************************************************/ @@ -3266,56 +3295,54 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d, UPB_NOINLINE static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, upb_strview *dst) { + uint64_t hasbits, uint64_t data) { + upb_strview *dst = (upb_strview*)data; if (!decode_verifyutf8_inl(dst->data, dst->size)) { return fastdecode_err(d); } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_longstring(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst, - bool validate_utf8) { - int size = (uint8_t)ptr[0]; // Could plumb through hasbits. - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); - } - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { - dst->size = 0; - return fastdecode_err(d); - } - - if (d->alias) { - dst->data = ptr; - dst->size = size; - } else { - char *data = upb_arena_malloc(&d->arena, size); - if (!data) { - return fastdecode_err(d); - } - memcpy(data, ptr, size); - dst->data = data; - dst->size = size; + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \ + dst->size = 0; \ + return fastdecode_err(d); \ + } \ + \ + if (d->alias) { \ + dst->data = ptr; \ + dst->size = size; \ + } else { \ + char *data = upb_arena_malloc(&d->arena, size); \ + if (!data) { \ + return fastdecode_err(d); \ + } \ + memcpy(data, ptr, size); \ + dst->data = data; \ + dst->size = size; \ + } \ + \ + ptr += size; \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } - if (validate_utf8) { - return fastdecode_verifyutf8(d, ptr + size, msg, table, hasbits, dst); - } else { - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); - } -} - UPB_NOINLINE static const char *fastdecode_longstring_utf8(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, true); + const char *ptr, upb_msg *msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); } UPB_NOINLINE @@ -3323,8 +3350,9 @@ static const char *fastdecode_longstring_noutf8(struct upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, false); + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); } UPB_FORCEINLINE @@ -3337,156 +3365,165 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, UPB_POISON_MEMORY_REGION(data + size, copy - size); } -UPB_FORCEINLINE -static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - size_t arena_has; - size_t common_has; - char *buf; - - UPB_ASSERT(!d->alias); - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (uint8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->size = size; - - buf = d->arena.head.ptr; - arena_has = _upb_arenahas(&d->arena); - common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); - - if (UPB_LIKELY(size <= 15 - tagbytes)) { - if (arena_has < 16) goto longstr; - d->arena.head.ptr += 16; - memcpy(buf, ptr - tagbytes - 1, 16); - dst->data = buf + tagbytes + 1; - } else if (UPB_LIKELY(size <= 32)) { - if (UPB_UNLIKELY(common_has < 32)) goto longstr; - fastdecode_docopy(d, ptr, size, 32, buf, dst); - } else if (UPB_LIKELY(size <= 64)) { - if (UPB_UNLIKELY(common_has < 64)) goto longstr; - fastdecode_docopy(d, ptr, size, 64, buf, dst); - } else if (UPB_LIKELY(size < 128)) { - if (UPB_UNLIKELY(common_has < 128)) goto longstr; - fastdecode_docopy(d, ptr, size, 128, buf, dst); - } else { - goto longstr; - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); - -longstr: - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } -} - -UPB_FORCEINLINE -static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, _upb_field_parser *copyfunc, - bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("string field tag mismatch\n"); - } - - if (UPB_UNLIKELY(!d->alias)) { - return copyfunc(UPB_PARSE_ARGS); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (int8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->data = ptr; - dst->size = size; - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - if (UPB_UNLIKELY(!d->alias)) { - // Buffer flipped and we can't alias any more. Bounce to copyfunc(), - // but via dispatch since we need to reload table data also. - fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - } - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char *buf; \ + \ + UPB_ASSERT(!d->alias); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_arenahas(&d->arena); \ + common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) \ + goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY(!d->alias)) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->data = ptr; \ + dst->size = size; \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + if (UPB_UNLIKELY(!d->alias)) { \ + /* Buffer flipped and we can't alias any more. Bounce to */ \ + /* copyfunc(), but via dispatch since we need to reload table */ \ + /* data also. */ \ + fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + } \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ @@ -3494,16 +3531,16 @@ again: #define s_VALIDATE true #define b_VALIDATE false -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - type##_VALIDATE); \ - } \ - const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - &upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ } #define UTF8(card, tagbytes) \ @@ -3522,6 +3559,9 @@ TAGBYTES(r) #undef b_VALIDATE #undef F #undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING /* message fields *************************************************************/ @@ -3554,82 +3594,82 @@ UPB_FORCEINLINE static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr, void *ctx) { fastdecode_submsgdata *submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0); + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); UPB_ASSUME(ptr != NULL); return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, - int msg_ceil_bytes, upb_card card) { - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("submessage field tag mismatch\n"); - } - - if (--d->depth == 0) return fastdecode_err(d); - - upb_msg **dst; - uint32_t submsg_idx = (data >> 16) & 0xff; - const upb_msglayout *tablep = decode_totablep(table); - const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; - fastdecode_arr farr; - - if (subtablep->table_mask == (uint8_t)-1) { - RETURN_GENERIC("submessage doesn't have fast tables."); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_msg *), card); - - if (card == CARD_s) { - *(uint32_t*)msg |= hasbits; - hasbits = 0; - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg*)); - } - - submsg.msg = *dst; - - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); - - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { - return fastdecode_err(d); - } - - if (card == CARD_r) { - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - d->depth++; - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - d->depth++; - return ptr; - } - } - - d->depth++; - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - return fastdecode_submsg(UPB_PARSE_ARGS, tagbytes, ceil_arg, CARD_##card); \ +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) return fastdecode_err(d); \ + \ + upb_msg **dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_msglayout *tablep = decode_totablep(table); \ + const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_msg *), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t *)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg *)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + return fastdecode_err(d); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } #define SIZES(card, tagbytes) \ @@ -3650,9 +3690,11 @@ TAGBYTES(r) #undef TAGBYTES #undef SIZES #undef F +#undef FASTDECODE_SUBMSG #endif /* UPB_FASTTABLE */ -/* This file was generated by upbc (the upb compiler) from the input + +/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4135,6 +4177,7 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { +/** upb/def.c ************************************************************/ #include #include @@ -4172,7 +4215,6 @@ struct upb_fielddef { uint32_t number_; uint16_t index_; uint16_t layout_index; - uint32_t selector_base; /* Used to index into a upb::Handlers table. */ bool is_extension_; bool lazy_; bool packed_; @@ -4185,8 +4227,6 @@ struct upb_msgdef { const upb_msglayout *layout; const upb_filedef *file; const char *full_name; - uint32_t selector_count; - uint32_t submsg_field_count; /* Tables for looking up fields by number and name. */ upb_inttable itof; @@ -4316,30 +4356,6 @@ int cmp_fields(const void *p1, const void *p2) { return field_rank(f1) - field_rank(f2); } -/* A few implementation details of handlers. We put these here to avoid - * a def -> handlers dependency. */ - -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ - -static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; -} - -static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; -} - static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } @@ -4431,8 +4447,7 @@ bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { upb_value v; - return upb_inttable_lookup32(&def->iton, num, &v) ? - upb_value_getcstr(v) : NULL; + return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getcstr(v) : NULL; } const char *upb_enum_iter_name(upb_enum_iter *iter) { @@ -4521,10 +4536,6 @@ const char *upb_fielddef_jsonname(const upb_fielddef *f) { return f->json_name; } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; -} - const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { return f->file; } @@ -4687,18 +4698,10 @@ upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { return m->file->syntax; } -size_t upb_msgdef_selectorcount(const upb_msgdef *m) { - return m->selector_count; -} - -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { - return m->submsg_field_count; -} - const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { upb_value val; - return upb_inttable_lookup32(&m->itof, i, &val) ? - upb_value_getconstptr(val) : NULL; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, @@ -4906,8 +4909,8 @@ const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { upb_value val; - return upb_inttable_lookup32(&o->itof, num, &val) ? - upb_value_getptr(val) : NULL; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; } void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { @@ -4987,7 +4990,6 @@ void upb_symtab_free(upb_symtab *s) { upb_symtab *upb_symtab_new(void) { upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; if (!s) { return NULL; @@ -4995,10 +4997,9 @@ upb_symtab *upb_symtab_new(void) { s->arena = upb_arena_new(); s->bytes_loaded = 0; - alloc = upb_arena_alloc(s->arena); - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + if (!upb_strtable_init(&s->syms, 32, s->arena) || + !upb_strtable_init(&s->files, 4, s->arena)) { upb_arena_free(s->arena); upb_gfree(s); s = NULL; @@ -5054,8 +5055,7 @@ int upb_symtab_filecount(const upb_symtab *s) { typedef struct { upb_symtab *symtab; upb_filedef *file; /* File we are building. */ - upb_arena *file_arena; /* Allocate defs here. */ - upb_alloc *alloc; /* Alloc of file_arena, for tables. */ + upb_arena *arena; /* Allocate defs here. */ const upb_msglayout **layouts; /* NULL if we should build layouts. */ upb_status *status; /* Record errors here. */ jmp_buf err; /* longjmp() on error. */ @@ -5077,7 +5077,7 @@ static void symtab_oomerr(symtab_addctx *ctx) { } void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { - void *ret = upb_arena_malloc(ctx->file_arena, bytes); + void *ret = upb_arena_malloc(ctx->arena, bytes); if (!ret) symtab_oomerr(ctx); return ret; } @@ -5184,13 +5184,21 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msg_field_iter it; upb_msg_oneof_iter oit; size_t hasbit; - size_t submsg_count = m->submsg_field_count; + size_t field_count = upb_msgdef_numfields(m); + size_t submsg_count = 0; const upb_msglayout **submsgs; upb_msglayout_field *fields; memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); - fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + /* Count sub-messages. */ + for (size_t i = 0; i < field_count; i++) { + if (upb_fielddef_issubmsg(&m->fields[i])) { + submsg_count++; + } + } + + fields = symtab_alloc(ctx, field_count * sizeof(*fields)); submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); @@ -5341,51 +5349,8 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { assign_layout_indices(m, fields); } -static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); -} - static char *strviewdup(symtab_addctx *ctx, upb_strview view) { - return upb_strdup2(view.data, view.size, ctx->alloc); + return upb_strdup2(view.data, view.size, ctx->arena); } static bool streql2(const char *a, size_t n, const char *b) { @@ -5496,9 +5461,9 @@ static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { symtab_errf(ctx, "duplicate symbol '%s'", name); } - upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); size_t len = strlen(name); - CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); + CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v, + ctx->symtab->arena)); } /* Given a symbol and the base symbol inside which it is defined, find the @@ -5531,7 +5496,8 @@ static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, } notfound: - symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); + symtab_errf(ctx, "couldn't resolve name '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(sym)); } static void create_oneofdef( @@ -5549,10 +5515,10 @@ static void create_oneofdef( v = pack_def(o, UPB_DEFTYPE_ONEOF); symtab_add(ctx, o->full_name, v); - CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena)); - CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); + CHK_OOM(upb_inttable_init(&o->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena)); } static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { @@ -5608,8 +5574,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_INT64: { - /* XXX: Need to write our own strtoll, since it's not available in c89. */ - int64_t val = strtol(str, &end, 0); + long long val = strtoll(str, &end, 0); if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { goto invalid; } @@ -5625,8 +5590,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_UINT64: { - /* XXX: Need to write our own strtoull, since it's not available in c89. */ - uint64_t val = strtoul(str, &end, 0); + unsigned long long val = strtoull(str, &end, 0); if (val > UINT64_MAX || errno == ERANGE || *end) { goto invalid; } @@ -5642,8 +5606,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_FLOAT: { - /* XXX: Need to write our own strtof, since it's not available in c89. */ - float val = strtod(str, &end); + float val = strtof(str, &end); if (errno == ERANGE || *end) { goto invalid; } @@ -5709,7 +5672,6 @@ static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { static void create_fielddef( symtab_addctx *ctx, const char *prefix, upb_msgdef *m, const google_protobuf_FieldDescriptorProto *field_proto) { - upb_alloc *alloc = ctx->alloc; upb_fielddef *f; const google_protobuf_FieldOptions *options; upb_strview name; @@ -5745,7 +5707,8 @@ static void create_fielddef( upb_value v, field_v, json_v; size_t json_size; - f = (upb_fielddef*)&m->fields[m->field_count++]; + f = (upb_fielddef*)&m->fields[m->field_count]; + f->index_ = m->field_count++; f->msgdef = m; f->is_extension_ = false; @@ -5766,12 +5729,12 @@ static void create_fielddef( v = upb_value_constptr(f); json_size = strlen(json_name); - CHK_OOM( - upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); - CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v, + ctx->arena)); + CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena)); if (strcmp(shortname, json_name) != 0) { - upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + upb_strtable_insert(&m->ntof, json_name, json_size, json_v, ctx->arena); } if (ctx->layouts) { @@ -5834,15 +5797,16 @@ static void create_fielddef( symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); } - oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; + oneof = (upb_oneofdef *)&m->oneofs[oneof_index]; f->oneof = oneof; oneof->field_count++; if (f->proto3_optional_) { oneof->synthetic = true; } - CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena)); + CHK_OOM( + upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena)); } else { f->oneof = NULL; if (f->proto3_optional_) { @@ -5885,8 +5849,8 @@ static void create_enumdef( symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); - CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); + CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena)); + CHK_OOM(upb_inttable_init(&e->iton, ctx->arena)); e->file = ctx->file; e->defaultval = 0; @@ -5913,16 +5877,15 @@ static void create_enumdef( } CHK_OOM(name2) - CHK_OOM( - upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&e->ntoi, name2, strlen(name2), v, ctx->arena)); if (!upb_inttable_lookup(&e->iton, num, NULL)) { upb_value v = upb_value_cstr(name2); - CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); + CHK_OOM(upb_inttable_insert(&e->iton, num, v, ctx->arena)); } } - upb_inttable_compact2(&e->iton, ctx->alloc); + upb_inttable_compact(&e->iton, ctx->arena); } static void create_msgdef(symtab_addctx *ctx, const char *prefix, @@ -5946,9 +5909,8 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); - CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field, - ctx->alloc)); + CHK_OOM(upb_inttable_init(&m->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena)); m->file = ctx->file; m->map_entry = false; @@ -5980,10 +5942,9 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, create_fielddef(ctx, m->full_name, m, fields[i]); } - assign_msg_indices(ctx, m); finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); - upb_inttable_compact2(&m->itof, ctx->alloc); + upb_inttable_compact(&m->itof, ctx->arena); /* This message is built. Now build nested messages and enums. */ @@ -6212,19 +6173,18 @@ static void build_filedef( } static void remove_filedef(upb_symtab *s, upb_filedef *file) { - upb_alloc *alloc = upb_arena_alloc(s->arena); int i; for (i = 0; i < file->msg_count; i++) { const char *name = file->msgs[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->enum_count; i++) { const char *name = file->enums[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->ext_count; i++) { const char *name = file->exts[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } } @@ -6242,8 +6202,7 @@ static const upb_filedef *_upb_symtab_addfile( ctx.file = file; ctx.symtab = s; - ctx.file_arena = file_arena; - ctx.alloc = upb_arena_alloc(file_arena); + ctx.arena = file_arena; ctx.layouts = layouts; ctx.status = status; @@ -6258,8 +6217,8 @@ static const upb_filedef *_upb_symtab_addfile( file = NULL; } else { build_filedef(&ctx, file, file_proto); - upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), ctx.alloc); + upb_strtable_insert(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.arena); UPB_ASSERT(upb_ok(status)); upb_arena_fuse(s->arena, file_arena); } @@ -6333,6 +6292,7 @@ upb_arena *_upb_symtab_arena(const upb_symtab *s) { #undef CHK_OOM +/** upb/reflection.c ************************************************************/ #include @@ -6443,40 +6403,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { return _upb_msg_getraw(msg, f); } else { - /* TODO(haberman): change upb_fielddef to not require this switch(). */ - upb_msgval val = {0}; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: - val.int32_val = upb_fielddef_defaultint32(f); - break; - case UPB_TYPE_INT64: - val.int64_val = upb_fielddef_defaultint64(f); - break; - case UPB_TYPE_UINT32: - val.uint32_val = upb_fielddef_defaultuint32(f); - break; - case UPB_TYPE_UINT64: - val.uint64_val = upb_fielddef_defaultuint64(f); - break; - case UPB_TYPE_FLOAT: - val.float_val = upb_fielddef_defaultfloat(f); - break; - case UPB_TYPE_DOUBLE: - val.double_val = upb_fielddef_defaultdouble(f); - break; - case UPB_TYPE_BOOL: - val.bool_val = upb_fielddef_defaultbool(f); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); - break; - case UPB_TYPE_MESSAGE: - val.msg_val = NULL; - break; - } - return val; + return upb_fielddef_default(f); } } @@ -6736,6 +6663,7 @@ upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ +/** upb/json_decode.c ************************************************************/ #include #include @@ -7646,17 +7574,17 @@ static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { return; } - if (upb_fielddef_realcontainingoneof(f) && - upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { - jsondec_err(d, "More than one field for this oneof."); - } - if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { /* JSON "null" indicates a default value, so no need to set anything. */ jsondec_null(d); return; } + if (upb_fielddef_realcontainingoneof(f) && + upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + preserved = d->debug_field; d->debug_field = f; @@ -8160,6 +8088,9 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, const upb_msgdef *m, const upb_symtab *any_pool, int options, upb_arena *arena, upb_status *status) { jsondec d; + + if (size == 0) return true; + d.ptr = buf; d.end = buf + size; d.arena = arena; @@ -8178,6 +8109,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, return true; } +/** upb/json_encode.c ************************************************************/ #include #include @@ -8207,7 +8139,7 @@ static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m); + const upb_msgdef *m, bool first); static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { @@ -8238,8 +8170,10 @@ static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { memcpy(e->ptr, data, len); e->ptr += len; } else { - if (have) memcpy(e->ptr, data, have); - e->ptr += have; + if (have) { + memcpy(e->ptr, data, have); + e->ptr += have; + } e->overflow += (len - have); } } @@ -8261,7 +8195,7 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { if (UPB_LIKELY(have > n)) { e->ptr += n; } else { - e->ptr += have; + e->ptr = UPB_PTRADD(e->ptr, have); e->overflow += (n - have); } } @@ -8365,7 +8299,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const unsigned char *ptr = (unsigned char*)str.data; - const unsigned char *end = ptr + str.size; + const unsigned char *end = UPB_PTRADD(ptr, str.size); char buf[4]; jsonenc_putstr(e, "\""); @@ -8401,7 +8335,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static void jsonenc_stringbody(jsonenc *e, upb_strview str) { const char *ptr = str.data; - const char *end = ptr + str.size; + const char *end = UPB_PTRADD(ptr, str.size); while (ptr < end) { switch (*ptr) { @@ -8517,14 +8451,13 @@ static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{\"@type\":"); jsonenc_string(e, type_url); - jsonenc_putstr(e, ","); if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */ - jsonenc_msgfields(e, any, any_m); + jsonenc_msgfields(e, any, any_m, false); } else { /* Well-known type: {"@type": "...","value": } */ - jsonenc_putstr(e, "\"value\":"); + jsonenc_putstr(e, ",\"value\":"); jsonenc_msgfield(e, any, any_m); } @@ -8827,10 +8760,9 @@ static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, } static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m) { + const upb_msgdef *m, bool first) { upb_msgval val; const upb_fielddef *f; - bool first = true; if (e->options & UPB_JSONENC_EMITDEFAULTS) { /* Iterate over all fields. */ @@ -8853,7 +8785,7 @@ static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{"); - jsonenc_msgfields(e, msg, m); + jsonenc_msgfields(e, msg, m, true); jsonenc_putstr(e, "}"); } @@ -8875,7 +8807,7 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, e.buf = buf; e.ptr = buf; - e.end = buf + size; + e.end = UPB_PTRADD(buf, size); e.overflow = 0; e.options = options; e.ext_pool = ext_pool; @@ -8888,27 +8820,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, if (e.arena) upb_arena_free(e.arena); return jsonenc_nullz(&e, size); } + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index fa04393558..68d6345fc5 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -1,26 +1,53 @@ /* Amalgamated source file */ -#include /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -136,9 +163,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#ifdef __has_attribute +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -149,7 +207,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -193,55 +251,36 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif + +/** upb/decode.h ************************************************************/ /* -** upb_decode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_decode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_DECODE_H_ #define UPB_DECODE_H_ + +/** upb/msg.h ************************************************************/ /* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ + * Public APIs for message operations that do not require descriptors. + * These functions can be used even in build that does not want to depend on + * reflection or descriptors. + * + * Descriptor-based reflection functionality lives in reflection.h. + */ #ifndef UPB_MSG_H_ #define UPB_MSG_H_ -#include -#include -#include - -/* -** upb_table -** -** This header is INTERNAL-ONLY! Its interfaces are not public or stable! -** This file defines very fast int->upb_value (inttable) and string->upb_value -** (strtable) hash tables. -** -** The table uses chained scatter with Brent's variation (inspired by the Lua -** implementation of hash tables). The hash function for strings is Austin -** Appleby's "MurmurHash." -** -** The inttable uses uintptr_t as its key, which guarantees it can be used to -** store pointers or integers of at least 32 bits (upb isn't really useful on -** systems where sizeof(void*) < 4). -** -** The table must be homogeneous (all values of the same type). In debug -** mode, we check this on insert and lookup. -*/ +#include -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ -#include -#include +/** upb/upb.h ************************************************************/ /* -** This file contains shared definitions that are widely used across upb. -*/ + * This file contains shared definitions that are widely used across upb. + */ #ifndef UPB_H_ #define UPB_H_ @@ -399,7 +438,7 @@ typedef struct { upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc); void upb_arena_free(upb_arena *a); bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func); -void upb_arena_fuse(upb_arena *a, upb_arena *b); +bool upb_arena_fuse(upb_arena *a, upb_arena *b); void *_upb_arena_slowmalloc(upb_arena *a, size_t size); UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } @@ -578,55 +617,134 @@ UPB_INLINE int _upb_lg2ceilsize(int x) { #endif /* UPB_H_ */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef void upb_msg; + +/* For users these are opaque. They can be obtained from upb_msgdef_layout() + * but users cannot access any of the members. */ +struct upb_msglayout; +typedef struct upb_msglayout upb_msglayout; + +/* Adds unknown data (serialized protobuf data) to the given message. The data + * is copied into the message instance. */ +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena); + +/* Returns a reference to the message's unknown data. */ +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MSG_INT_H_ */ + +/* Must be last. */ #ifdef __cplusplus extern "C" { #endif +enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ + UPB_DECODE_ALIAS = 1, +}; -/* upb_value ******************************************************************/ +#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) -/* A tagged union (stored untagged inside the table) so that we can check that - * clients calling table accessors are correctly typed without having to have - * an explosion of accessors. */ -typedef enum { - UPB_CTYPE_INT32 = 1, - UPB_CTYPE_INT64 = 2, - UPB_CTYPE_UINT32 = 3, - UPB_CTYPE_UINT64 = 4, - UPB_CTYPE_BOOL = 5, - UPB_CTYPE_CSTR = 6, - UPB_CTYPE_PTR = 7, - UPB_CTYPE_CONSTPTR = 8, - UPB_CTYPE_FPTR = 9, - UPB_CTYPE_FLOAT = 10, - UPB_CTYPE_DOUBLE = 11 -} upb_ctype_t; +bool _upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena, int options); + +UPB_INLINE +bool upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena) { + return _upb_decode(buf, size, msg, l, arena, 0); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_DECODE_H_ */ + +/** upb/decode_internal.h ************************************************************/ +/* + * Internal implementation details of the decoder that are shared between + * decode.c and decode_fast.c. + */ + +#ifndef UPB_DECODE_INT_H_ +#define UPB_DECODE_INT_H_ + +#include + + +/** upb/msg_internal.h ************************************************************//* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MSG_INT_H_ +#define UPB_MSG_INT_H_ + +#include +#include +#include + + +/** upb/table_internal.h ************************************************************/ +/* + * upb_table + * + * This header is INTERNAL-ONLY! Its interfaces are not public or stable! + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. + * + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). + * + * The table must be homogeneous (all values of the same type). In debug + * mode, we check this on insert and lookup. + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_value ******************************************************************/ typedef struct { uint64_t val; } upb_value; -/* Like strdup(), which isn't always available since it's not ANSI C. */ -char *upb_strdup(const char *s, upb_alloc *a); /* Variant that works with a length-delimited rather than NULL-delimited string, * as supported by strtable. */ -char *upb_strdup2(const char *s, size_t len, upb_alloc *a); - -UPB_INLINE char *upb_gstrdup(const char *s) { - return upb_strdup(s, &upb_alloc_global); -} +char *upb_strdup2(const char *s, size_t len, upb_arena *a); UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) { v->val = val; } -UPB_INLINE upb_value _upb_value_val(uint64_t val) { - upb_value ret; - _upb_value_setval(&ret, val); - return ret; -} - /* For each value ctype, define the following set of functions: * * // Get/set an int32 from a upb_value. @@ -734,14 +852,7 @@ typedef struct { uint32_t mask; /* Mask to turn hash value -> bucket. */ uint32_t max_count; /* Max count before we hit our load limit. */ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */ - - /* Hash table entries. - * Making this const isn't entirely accurate; what we really want is for it to - * have the same const-ness as the table it's inside. But there's no way to - * declare that in C. So we have to make it const so that we can statically - * initialize const hash tables. Then we cast away const when we have to. - */ - const upb_tabent *entries; + upb_tabent *entries; } upb_table; typedef struct { @@ -755,8 +866,6 @@ typedef struct { size_t array_count; /* Array part number of elements. */ } upb_inttable; -#define UPB_ARRAY_EMPTYENT -1 - UPB_INLINE size_t upb_table_size(const upb_table *t) { if (t->size_lg2 == 0) return 0; @@ -769,48 +878,10 @@ UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key == 0; } -/* Used by some of the unit tests for generic hashing functionality. */ -uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed); - -UPB_INLINE uintptr_t upb_intkey(uintptr_t key) { - return key; -} - -UPB_INLINE uint32_t upb_inthash(uintptr_t key) { - return (uint32_t)key; -} - -static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { - return t->entries + (hash & t->mask); -} - -UPB_INLINE bool upb_arrhas(upb_tabval key) { - return key.val != (uint64_t)-1; -} - /* Initialize and uninitialize a table, respectively. If memory allocation * failed, false is returned that the table is uninitialized. */ -bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a); -bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a); -void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a); -void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a); - -UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) { - return upb_inttable_init2(table, ctype, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) { - return upb_strtable_init2(table, ctype, 4, &upb_alloc_global); -} - -UPB_INLINE void upb_inttable_uninit(upb_inttable *table) { - upb_inttable_uninit2(table, &upb_alloc_global); -} - -UPB_INLINE void upb_strtable_uninit(upb_strtable *table) { - upb_strtable_uninit2(table, &upb_alloc_global); -} +bool upb_inttable_init(upb_inttable *table, upb_arena *a); +bool upb_strtable_init(upb_strtable *table, size_t expected_size, upb_arena *a); /* Returns the number of values in the table. */ size_t upb_inttable_count(const upb_inttable *t); @@ -818,12 +889,6 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -void upb_inttable_packedsize(const upb_inttable *t, size_t *size); -void upb_strtable_packedsize(const upb_strtable *t, size_t *size); -upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs, - size_t size); -upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs, - size_t size); void upb_strtable_clear(upb_strtable *t); /* Inserts the given key into the hashtable with the given value. The key must @@ -833,26 +898,10 @@ void upb_strtable_clear(upb_strtable *t); * * If a table resize was required but memory allocation failed, false is * returned and the table is unchanged. */ -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a); -bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len, - upb_value val, upb_alloc *a); - -UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key, - upb_value val) { - return upb_inttable_insert2(t, key, val, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key, - size_t len, upb_value val) { - return upb_strtable_insert3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key, - upb_value val) { - return upb_strtable_insert2(t, key, strlen(key), val); -} +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a); +bool upb_strtable_insert(upb_strtable *t, const char *key, size_t len, + upb_value val, upb_arena *a); /* Looks up key in this table, returning "true" if the key was found. * If v is non-NULL, copies the value for this key into *v. */ @@ -869,74 +918,21 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, /* Removes an item from the table. Returns true if the remove was successful, * and stores the removed item in *val if non-NULL. */ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc); - -UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key, - size_t len, upb_value *val) { - return upb_strtable_remove3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, - upb_value *v) { - return upb_strtable_remove2(t, key, strlen(key), v); -} +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val); /* Updates an existing entry in an inttable. If the entry does not exist, * returns false and does nothing. Unlike insert/remove, this does not * invalidate iterators. */ bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val); -/* Convenience routines for inttables with pointer keys. */ -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a); -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); -bool upb_inttable_lookupptr( - const upb_inttable *t, const void *key, upb_value *val); - -UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key, - upb_value val) { - return upb_inttable_insertptr2(t, key, val, &upb_alloc_global); -} - /* Optimizes the table for the current set of entries, for both memory use and * lookup time. Client should call this after all entries have been inserted; * inserting more entries is legal, but will likely require a table resize. */ -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a); - -UPB_INLINE void upb_inttable_compact(upb_inttable *t) { - upb_inttable_compact2(t, &upb_alloc_global); -} - -/* A special-case inlinable version of the lookup routine for 32-bit - * integers. */ -UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key, - upb_value *v) { - *v = upb_value_int32(0); /* Silence compiler warnings. */ - if (key < t->array_size) { - upb_tabval arrval = t->array[key]; - if (upb_arrhas(arrval)) { - _upb_value_setval(v, arrval.val); - return true; - } else { - return false; - } - } else { - const upb_tabent *e; - if (t->t.entries == NULL) return false; - for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) { - if ((uint32_t)e->key == key) { - _upb_value_setval(v, e->val.val); - return true; - } - if (e->next == NULL) return false; - } - } -} +void upb_inttable_compact(upb_inttable *t, upb_arena *a); /* Exposed for testing only. */ -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a); +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a); /* Iterators ******************************************************************/ @@ -1032,10 +1028,6 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, extern "C" { #endif -#define PTR_AT(msg, ofs, type) (type*)((const char*)msg + ofs) - -typedef void upb_msg; - /** upb_msglayout *************************************************************/ /* upb_msglayout represents the memory layout of a given upb_msgdef. The @@ -1070,7 +1062,7 @@ typedef struct { _upb_field_parser *field_parser; } _upb_fasttable_entry; -typedef struct upb_msglayout { +struct upb_msglayout { const struct upb_msglayout *const* submsgs; const upb_msglayout_field *fields; /* Must be aligned to sizeof(void*). Doesn't include internal members like @@ -1082,7 +1074,7 @@ typedef struct upb_msglayout { /* To constant-initialize the tables of variable length, we need a flexible * array member, and we need to compile in C99 mode. */ _upb_fasttable_entry fasttable[]; -} upb_msglayout; +}; /** upb_msg *******************************************************************/ @@ -1137,21 +1129,18 @@ void _upb_msg_discardunknown_shallow(upb_msg *msg); bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** Hasbit access *************************************************************/ UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) { - return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; + return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; } UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); + (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); } UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); + (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); } UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) { @@ -1177,11 +1166,11 @@ UPB_INLINE void _upb_clearhas_field(const upb_msg *msg, /** Oneof case access *********************************************************/ UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) { - return PTR_AT(msg, case_ofs, uint32_t); + return UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) { - return *PTR_AT(msg, case_ofs, uint32_t); + return *UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) { @@ -1200,7 +1189,7 @@ UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg, } UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) { - return *PTR_AT(msg, ofs, const upb_msg*) != NULL; + return *UPB_PTR_AT(msg, ofs, const upb_msg*) != NULL; } UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) { @@ -1277,7 +1266,7 @@ UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size, UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, size_t *size) { - const upb_array *arr = *PTR_AT(msg, ofs, const upb_array*); + const upb_array *arr = *UPB_PTR_AT(msg, ofs, const upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_constptr(arr); @@ -1289,7 +1278,7 @@ UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, size_t *size) { - upb_array *arr = *PTR_AT(msg, ofs, upb_array*); + upb_array *arr = *UPB_PTR_AT(msg, ofs, upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_ptr(arr); @@ -1302,7 +1291,7 @@ UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size, int elem_size_lg2, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); upb_array *arr = *arr_ptr; if (!arr || arr->size < size) { return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena); @@ -1315,7 +1304,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, int elem_size_lg2, const void *value, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); size_t elem_size = 1 << elem_size_lg2; upb_array *arr = *arr_ptr; void *ptr; @@ -1323,7 +1312,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena); } ptr = _upb_array_ptr(arr); - memcpy(PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); + memcpy(UPB_PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); arr->len++; return true; } @@ -1470,20 +1459,19 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) { } UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size, - void *val, size_t val_size, upb_arena *arena) { + void *val, size_t val_size, upb_arena *a) { upb_strview strkey = _upb_map_tokey(key, key_size); upb_value tabval = {0}; - if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false; - upb_alloc *a = upb_arena_alloc(arena); + if (!_upb_map_tovalue(val, val_size, &tabval, a)) return false; /* TODO(haberman): add overwrite operation to minimize number of lookups. */ - upb_strtable_remove3(&map->table, strkey.data, strkey.size, NULL, a); - return upb_strtable_insert3(&map->table, strkey.data, strkey.size, tabval, a); + upb_strtable_remove(&map->table, strkey.data, strkey.size, NULL); + return upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a); } UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) { upb_strview k = _upb_map_tokey(key, key_size); - return upb_strtable_remove3(&map->table, k.data, k.size, NULL, NULL); + return upb_strtable_remove(&map->table, k.data, k.size, NULL); } UPB_INLINE void _upb_map_clear(upb_map *map) { @@ -1515,7 +1503,7 @@ UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs, UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key, size_t key_size, void *val, size_t val_size, upb_arena *arena) { - upb_map **map = PTR_AT(msg, ofs, upb_map *); + upb_map **map = UPB_PTR_AT(msg, ofs, upb_map *); if (!*map) { *map = _upb_map_new(arena, key_size, val_size); } @@ -1548,8 +1536,7 @@ UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) { const upb_tabent *ent = (const upb_tabent*)msg; - upb_value v; - _upb_value_setval(&v, ent->val.val); + upb_value v = {ent->val.val}; _upb_map_fromvalue(v, val, size); } @@ -1612,55 +1599,14 @@ UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, return true; } -#undef PTR_AT - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_MSG_H_ */ - -/* Must be last. */ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* If set, strings will alias the input buffer instead of copying into the - * arena. */ - UPB_DECODE_ALIAS = 1, -}; - -#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) - -bool _upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena, int options); - -UPB_INLINE -bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena) { - return _upb_decode(buf, size, msg, l, arena, 0); -} - #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_DECODE_H_ */ -/* -** Internal implementation details of the decoder that are shared between -** decode.c and decode_fast.c. -*/ - -#ifndef UPB_DECODE_INT_H_ -#define UPB_DECODE_INT_H_ - -#include - +#endif /* UPB_MSG_INT_H_ */ +/** upb/upb_internal.h ************************************************************/ #ifndef UPB_INT_H_ #define UPB_INT_H_ @@ -1670,7 +1616,10 @@ typedef struct mem_block mem_block; struct upb_arena { _upb_arena_head head; - uint32_t *cleanups; + /* Stores cleanup metadata for this arena. + * - a pointer to the current cleanup counter. + * - a boolean indicating if there is an unowned initial block. */ + uintptr_t cleanup_metadata; /* Allocator to allocate arena blocks. We are responsible for freeing these * when we are destroyed. */ @@ -1792,10 +1741,11 @@ bool decode_isdone(upb_decstate *d, const char **ptr) { } } +#if UPB_FASTTABLE UPB_INLINE const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, uint32_t tag) { + uint64_t hasbits, uint64_t tag) { const upb_msglayout *table_p = decode_totablep(table); uint8_t mask = table; uint64_t data; @@ -1803,8 +1753,10 @@ const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, UPB_ASSUME((idx & 7) == 0); idx >>= 3; data = table_p->fasttable[idx].field_data ^ tag; - return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data); + UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table, + hasbits, data); } +#endif UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { uint16_t tag; @@ -1837,9 +1789,11 @@ UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr, #endif /* UPB_DECODE_INT_H_ */ + +/** upb/encode.h ************************************************************/ /* -** upb_encode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_encode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_ENCODE_H_ #define UPB_ENCODE_H_ @@ -1880,6 +1834,8 @@ UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, #endif #endif /* UPB_ENCODE_H_ */ + +/** upb/decode_fast.h ************************************************************/ // These are the specialized field parser functions for the fast parser. // Generated tables will refer to these by name. // @@ -2005,7 +1961,8 @@ TAGBYTES(r) #undef UPB_PARSE_PARAMS #endif /* UPB_DECODE_FAST_H_ */ -/* This file was generated by upbc (the upb compiler) from the input + +/** google/protobuf/descriptor.upb.h ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -3884,18 +3841,20 @@ UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_prot #endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_ */ + +/** upb/def.h ************************************************************/ /* -** Defs are upb's internal representation of the constructs that can appear -** in a .proto file: -** -** - upb_msgdef: describes a "message" construct. -** - upb_fielddef: describes a message field. -** - upb_filedef: describes a .proto file and its defs. -** - upb_enumdef: describes an enum. -** - upb_oneofdef: describes a oneof. -** -** TODO: definitions of services. -*/ + * Defs are upb's internal representation of the constructs that can appear + * in a .proto file: + * + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_filedef: describes a .proto file and its defs. + * - upb_enumdef: describes an enum. + * - upb_oneofdef: describes a oneof. + * + * TODO: definitions of services. + */ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ @@ -3991,9 +3950,6 @@ const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f); const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f); const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f); -/* Internal only. */ -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f); - /* upb_oneofdef ***************************************************************/ typedef upb_inttable_iter upb_oneof_iter; @@ -4078,10 +4034,6 @@ UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, return upb_msgdef_ntof(m, name, strlen(name)); } -/* Internal-only. */ -size_t upb_msgdef_selectorcount(const upb_msgdef *m); -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m); - /* Lookup of either field or oneof by name. Returns whether either was found. * If the return is true, then the found def will be set, and the non-found * one set to NULL. */ @@ -4197,6 +4149,7 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init); #endif /* UPB_DEF_H_ */ +/** upb/reflection.h ************************************************************/ #ifndef UPB_REFLECTION_H_ #define UPB_REFLECTION_H_ @@ -4278,17 +4231,9 @@ bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, const upb_symtab *ext_pool, const upb_fielddef **f, upb_msgval *val, size_t *iter); -/* Adds unknown data (serialized protobuf data) to the given message. The data - * is copied into the message instance. */ -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena); - /* Clears all unknown field data from this message and all submessages. */ bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** upb_array *****************************************************************/ /* Creates a new array on the given arena that holds elements of this type. */ @@ -4370,6 +4315,7 @@ void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); #endif /* UPB_REFLECTION_H_ */ +/** upb/json_decode.h ************************************************************/ #ifndef UPB_JSONDECODE_H_ #define UPB_JSONDECODE_H_ @@ -4392,6 +4338,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, #endif /* UPB_JSONDECODE_H_ */ +/** upb/json_encode.h ************************************************************/ #ifndef UPB_JSONENCODE_H_ #define UPB_JSONENCODE_H_ @@ -4426,27 +4373,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, #endif #endif /* UPB_JSONENCODE_H_ */ + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/src/Makefile.am b/src/Makefile.am index 9d45e0212e..f5912cf54d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -95,6 +95,7 @@ nobase_include_HEADERS = \ google/protobuf/empty.pb.h \ google/protobuf/extension_set.h \ google/protobuf/extension_set_inl.h \ + google/protobuf/field_access_listener.h \ google/protobuf/field_mask.pb.h \ google/protobuf/generated_enum_reflection.h \ google/protobuf/generated_enum_util.h \ @@ -203,6 +204,7 @@ libprotobuf_lite_la_SOURCES = \ google/protobuf/arena.cc \ google/protobuf/arenastring.cc \ google/protobuf/extension_set.cc \ + google/protobuf/field_access_listener.cc \ google/protobuf/generated_enum_util.cc \ google/protobuf/generated_message_util.cc \ google/protobuf/generated_message_table_driven_lite.h \ diff --git a/src/README.md b/src/README.md index 78d6bb5ec7..ec4901d894 100644 --- a/src/README.md +++ b/src/README.md @@ -229,4 +229,4 @@ Usage The complete documentation for Protocol Buffers is available via the web at: - https://developers.google.com/protocol-buffers/ +https://developers.google.com/protocol-buffers/ diff --git a/src/google/protobuf/any.pb.cc b/src/google/protobuf/any.pb.cc index 76be84980c..c6dff7b1f3 100644 --- a/src/google/protobuf/any.pb.cc +++ b/src/google/protobuf/any.pb.cc @@ -215,7 +215,7 @@ failure: (void) cached_has_bits; // string type_url = 1; - if (!this->type_url().empty()) { + if (!this->_internal_type_url().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_type_url().data(), static_cast(this->_internal_type_url().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -225,7 +225,7 @@ failure: } // bytes value = 2; - if (!this->value().empty()) { + if (!this->_internal_value().empty()) { target = stream->WriteBytesMaybeAliased( 2, this->_internal_value(), target); } @@ -247,14 +247,14 @@ size_t Any::ByteSizeLong() const { (void) cached_has_bits; // string type_url = 1; - if (!this->type_url().empty()) { + if (!this->_internal_type_url().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_type_url()); } // bytes value = 2; - if (!this->value().empty()) { + if (!this->_internal_value().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::BytesSize( this->_internal_value()); @@ -288,10 +288,10 @@ void Any::MergeFrom(const Any& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!from.type_url().empty()) { + if (!from._internal_type_url().empty()) { _internal_set_type_url(from._internal_type_url()); } - if (!from.value().empty()) { + if (!from._internal_value().empty()) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); diff --git a/src/google/protobuf/api.pb.cc b/src/google/protobuf/api.pb.cc index 35deae3795..310ea2b199 100644 --- a/src/google/protobuf/api.pb.cc +++ b/src/google/protobuf/api.pb.cc @@ -372,7 +372,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -398,7 +398,7 @@ failure: } // string version = 4; - if (!this->version().empty()) { + if (!this->_internal_version().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_version().data(), static_cast(this->_internal_version().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -408,7 +408,7 @@ failure: } // .google.protobuf.SourceContext source_context = 5; - if (this->has_source_context()) { + if (this->_internal_has_source_context()) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: InternalWriteMessage( @@ -424,7 +424,7 @@ failure: } // .google.protobuf.Syntax syntax = 7; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( 7, this->_internal_syntax(), target); @@ -468,28 +468,28 @@ size_t Api::ByteSizeLong() const { } // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // string version = 4; - if (!this->version().empty()) { + if (!this->_internal_version().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_version()); } // .google.protobuf.SourceContext source_context = 5; - if (this->has_source_context()) { + if (this->_internal_has_source_context()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *source_context_); } // .google.protobuf.Syntax syntax = 7; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_syntax()); } @@ -525,16 +525,16 @@ void Api::MergeFrom(const Api& from) { methods_.MergeFrom(from.methods_); options_.MergeFrom(from.options_); mixins_.MergeFrom(from.mixins_); - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (!from.version().empty()) { + if (!from._internal_version().empty()) { _internal_set_version(from._internal_version()); } - if (from.has_source_context()) { + if (from._internal_has_source_context()) { _internal_mutable_source_context()->PROTOBUF_NAMESPACE_ID::SourceContext::MergeFrom(from._internal_source_context()); } - if (from.syntax() != 0) { + if (from._internal_syntax() != 0) { _internal_set_syntax(from._internal_syntax()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -772,7 +772,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -782,7 +782,7 @@ failure: } // string request_type_url = 2; - if (!this->request_type_url().empty()) { + if (!this->_internal_request_type_url().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_request_type_url().data(), static_cast(this->_internal_request_type_url().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -792,13 +792,13 @@ failure: } // bool request_streaming = 3; - if (this->request_streaming() != 0) { + if (this->_internal_request_streaming() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(3, this->_internal_request_streaming(), target); } // string response_type_url = 4; - if (!this->response_type_url().empty()) { + if (!this->_internal_response_type_url().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_response_type_url().data(), static_cast(this->_internal_response_type_url().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -808,7 +808,7 @@ failure: } // bool response_streaming = 5; - if (this->response_streaming() != 0) { + if (this->_internal_response_streaming() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(5, this->_internal_response_streaming(), target); } @@ -822,7 +822,7 @@ failure: } // .google.protobuf.Syntax syntax = 7; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( 7, this->_internal_syntax(), target); @@ -852,38 +852,38 @@ size_t Method::ByteSizeLong() const { } // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // string request_type_url = 2; - if (!this->request_type_url().empty()) { + if (!this->_internal_request_type_url().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_request_type_url()); } // string response_type_url = 4; - if (!this->response_type_url().empty()) { + if (!this->_internal_response_type_url().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_response_type_url()); } // bool request_streaming = 3; - if (this->request_streaming() != 0) { + if (this->_internal_request_streaming() != 0) { total_size += 1 + 1; } // bool response_streaming = 5; - if (this->response_streaming() != 0) { + if (this->_internal_response_streaming() != 0) { total_size += 1 + 1; } // .google.protobuf.Syntax syntax = 7; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_syntax()); } @@ -917,22 +917,22 @@ void Method::MergeFrom(const Method& from) { (void) cached_has_bits; options_.MergeFrom(from.options_); - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (!from.request_type_url().empty()) { + if (!from._internal_request_type_url().empty()) { _internal_set_request_type_url(from._internal_request_type_url()); } - if (!from.response_type_url().empty()) { + if (!from._internal_response_type_url().empty()) { _internal_set_response_type_url(from._internal_response_type_url()); } - if (from.request_streaming() != 0) { + if (from._internal_request_streaming() != 0) { _internal_set_request_streaming(from._internal_request_streaming()); } - if (from.response_streaming() != 0) { + if (from._internal_response_streaming() != 0) { _internal_set_response_streaming(from._internal_response_streaming()); } - if (from.syntax() != 0) { + if (from._internal_syntax() != 0) { _internal_set_syntax(from._internal_syntax()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1106,7 +1106,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1116,7 +1116,7 @@ failure: } // string root = 2; - if (!this->root().empty()) { + if (!this->_internal_root().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_root().data(), static_cast(this->_internal_root().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1142,14 +1142,14 @@ size_t Mixin::ByteSizeLong() const { (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // string root = 2; - if (!this->root().empty()) { + if (!this->_internal_root().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_root()); @@ -1183,10 +1183,10 @@ void Mixin::MergeFrom(const Mixin& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (!from.root().empty()) { + if (!from._internal_root().empty()) { _internal_set_root(from._internal_root()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); diff --git a/src/google/protobuf/api.pb.h b/src/google/protobuf/api.pb.h index 3806313a49..193a75399f 100644 --- a/src/google/protobuf/api.pb.h +++ b/src/google/protobuf/api.pb.h @@ -926,9 +926,15 @@ inline PROTOBUF_NAMESPACE_ID::SourceContext* Api::release_source_context() { PROTOBUF_NAMESPACE_ID::SourceContext* temp = source_context_; source_context_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::SourceContext* Api::unsafe_arena_release_source_context() { diff --git a/src/google/protobuf/arenastring.cc b/src/google/protobuf/arenastring.cc index da49325ec1..7608b13c7a 100644 --- a/src/google/protobuf/arenastring.cc +++ b/src/google/protobuf/arenastring.cc @@ -256,6 +256,24 @@ void ArenaStringPtr::ClearToDefault(const LazyString& default_value, } } +const char* EpsCopyInputStream::ReadArenaString(const char* ptr, + ArenaStringPtr* s, + Arena* arena) { + GOOGLE_DCHECK(arena != nullptr); + + int size = ReadSize(&ptr); + if (!ptr) return nullptr; + + auto str = Arena::Create(arena); + ptr = ReadString(ptr, size, str); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + + TaggedPtr res; + res.Set(str); + s->UnsafeSetTaggedPointer(res); + + return ptr; +} } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/compiler/cpp/cpp_field.cc b/src/google/protobuf/compiler/cpp/cpp_field.cc index 949405deea..82247ff065 100644 --- a/src/google/protobuf/compiler/cpp/cpp_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_field.cc @@ -59,6 +59,69 @@ namespace cpp { using internal::WireFormat; +namespace { + +std::string GenerateAnnotation(StringPiece substitute_template_prefix, + StringPiece prepared_template, + StringPiece substitute_template_suffix, + int field_index, StringPiece lambda_args, + StringPiece access_type) { + return strings::Substitute( + StrCat(substitute_template_prefix, prepared_template, + substitute_template_suffix), + field_index, access_type, lambda_args); +} + +std::string GenerateTemplateForOneofString(const FieldDescriptor* descriptor, + StringPiece proto_ns, + StringPiece field_member) { + std::string field_pointer = + descriptor->options().ctype() == google::protobuf::FieldOptions::STRING + ? "$0.GetPointer()" + : "$0"; + + if (descriptor->default_value_string().empty()) { + return strings::Substitute( + StrCat("_internal_has_", + google::protobuf::compiler::cpp::FieldName(descriptor), + "()? _listener_->ExtractFieldInfo(", field_pointer, + "): ::", proto_ns, "::FieldAccessListener::AddressInfo()"), + field_member); + } + + if (descriptor->options().ctype() == google::protobuf::FieldOptions::STRING_PIECE) { + return StrCat("_listener_->ExtractFieldInfo(_internal_", + google::protobuf::compiler::cpp::FieldName(descriptor), "())"); + } + + std::string default_value_pointer = + descriptor->options().ctype() == google::protobuf::FieldOptions::STRING + ? "&$1.get()" + : "&$1"; + return strings::Substitute( + StrCat("_listener_->ExtractFieldInfo(_internal_has_", + google::protobuf::compiler::cpp::FieldName(descriptor), "()? ", + field_pointer, " : ", default_value_pointer, ")"), + field_member, MakeDefaultName(descriptor)); +} + +std::string GenerateTemplateForSingleString(const FieldDescriptor* descriptor, + StringPiece field_member) { + if (descriptor->default_value_string().empty()) { + return strings::Substitute("_listener_->ExtractFieldInfo(&$0)", field_member); + } + + if (descriptor->options().ctype() == google::protobuf::FieldOptions::STRING) { + return strings::Substitute( + "_listener_->ExtractFieldInfo($0.IsDefault(" + "nullptr) ? &$1.get() : $0.GetPointer())", + field_member, MakeDefaultName(descriptor)); + } + + return strings::Substitute("_listener_->ExtractFieldInfo(&$0)", field_member); +} + +} // namespace void AddAccessorAnnotations(const FieldDescriptor* descriptor, const Options& options, @@ -74,6 +137,129 @@ void AddAccessorAnnotations(const FieldDescriptor* descriptor, for (size_t i = 0; i < GOOGLE_ARRAYSIZE(kAccessorsAnnotations); ++i) { (*variables)[kAccessorsAnnotations[i]] = ""; } + if (options.annotate_accessor) { + for (size_t i = 0; i < GOOGLE_ARRAYSIZE(kAccessorsAnnotations); ++i) { + (*variables)[kAccessorsAnnotations[i]] = StrCat( + " ", FieldName(descriptor), "_AccessedNoStrip = true;\n"); + } + } + if (!options.inject_field_listener_events) { + return; + } + if (descriptor->file()->options().optimize_for() == + google::protobuf::FileOptions::LITE_RUNTIME) { + return; + } + std::string field_member = (*variables)["field_member"]; + const google::protobuf::OneofDescriptor* oneof_member = + descriptor->real_containing_oneof(); + if (oneof_member) { + field_member = StrCat(oneof_member->name(), "_.", field_member); + } + const std::string proto_ns = (*variables)["proto_ns"]; + std::string lambda_args = "_listener_, this"; + std::string lambda_flat_args = "_listener_, this"; + const std::string substitute_template_prefix = StrCat( + " {\n" + " auto _listener_ = ::", + proto_ns, + "::FieldAccessListener::GetListener();\n" + " if (_listener_) _listener_->OnFieldAccess([$2] { return "); + const std::string substitute_template_suffix = StrCat( + "; }, " + "GetDescriptor()->field($0), " + "::", + proto_ns, + "::FieldAccessListener::FieldAccessType::$1);\n" + " }\n"); + std::string prepared_template; + + // Flat template is needed if the prepared one is introspecting the values + // inside the returned values, for example, for repeated fields and maps. + std::string prepared_flat_template; + std::string prepared_add_template; + // TODO(jianzhouzh): Fix all forward declared messages and deal with the + // weak fields. + if (descriptor->is_repeated() && !descriptor->is_map()) { + if (descriptor->type() != FieldDescriptor::TYPE_MESSAGE && + descriptor->type() != FieldDescriptor::TYPE_GROUP) { + lambda_args = "_listener_, this, index"; + prepared_template = strings::Substitute( + "_listener_->ExtractFieldInfo(&$0.Get(index))", field_member); + prepared_add_template = strings::Substitute( + "_listener_->ExtractFieldInfo(&$0.Get($0.size() - 1))", field_member); + } else { + prepared_template = + StrCat("::", proto_ns, "::FieldAccessListener::AddressInfo()"); + prepared_add_template = + StrCat("::", proto_ns, "::FieldAccessListener::AddressInfo()"); + } + } else if (descriptor->is_map()) { + prepared_template = + StrCat("::", proto_ns, "::FieldAccessListener::AddressInfo()"); + } else if (descriptor->type() == FieldDescriptor::TYPE_MESSAGE && + !descriptor->options().lazy()) { + prepared_template = + StrCat("::", proto_ns, "::FieldAccessListener::AddressInfo()"); + } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { + if (oneof_member) { + prepared_template = GenerateTemplateForOneofString( + descriptor, (*variables)["proto_ns"], field_member); + } else { + prepared_template = + GenerateTemplateForSingleString(descriptor, field_member); + } + } else { + prepared_template = + strings::Substitute("_listener_->ExtractFieldInfo(&$0)", field_member); + } + if (descriptor->is_repeated() && !descriptor->is_map() && + descriptor->type() != FieldDescriptor::TYPE_MESSAGE && + descriptor->type() != FieldDescriptor::TYPE_GROUP) { + prepared_flat_template = + strings::Substitute("_listener_->ExtractFieldInfo(&$0)", field_member); + } else { + prepared_flat_template = prepared_template; + } + (*variables)["annotate_get"] = GenerateAnnotation( + substitute_template_prefix, prepared_template, substitute_template_suffix, + descriptor->index(), lambda_args, "kGet"); + (*variables)["annotate_set"] = GenerateAnnotation( + substitute_template_prefix, prepared_template, substitute_template_suffix, + descriptor->index(), lambda_args, "kSet"); + (*variables)["annotate_has"] = GenerateAnnotation( + substitute_template_prefix, prepared_template, substitute_template_suffix, + descriptor->index(), lambda_args, "kHas"); + (*variables)["annotate_mutable"] = GenerateAnnotation( + substitute_template_prefix, prepared_template, substitute_template_suffix, + descriptor->index(), lambda_args, "kMutable"); + (*variables)["annotate_release"] = GenerateAnnotation( + substitute_template_prefix, prepared_template, substitute_template_suffix, + descriptor->index(), lambda_args, "kRelease"); + (*variables)["annotate_clear"] = + GenerateAnnotation(substitute_template_prefix, prepared_flat_template, + substitute_template_suffix, descriptor->index(), + lambda_flat_args, "kClear"); + (*variables)["annotate_size"] = + GenerateAnnotation(substitute_template_prefix, prepared_flat_template, + substitute_template_suffix, descriptor->index(), + lambda_flat_args, "kSize"); + (*variables)["annotate_list"] = + GenerateAnnotation(substitute_template_prefix, prepared_flat_template, + substitute_template_suffix, descriptor->index(), + lambda_flat_args, "kList"); + (*variables)["annotate_mutable_list"] = + GenerateAnnotation(substitute_template_prefix, prepared_flat_template, + substitute_template_suffix, descriptor->index(), + lambda_flat_args, "kMutableList"); + (*variables)["annotate_add"] = + GenerateAnnotation(substitute_template_prefix, prepared_add_template, + substitute_template_suffix, descriptor->index(), + lambda_flat_args, "kAdd"); + (*variables)["annotate_add_mutable"] = + GenerateAnnotation(substitute_template_prefix, prepared_add_template, + substitute_template_suffix, descriptor->index(), + lambda_flat_args, "kAddMutable"); } void SetCommonFieldVariables(const FieldDescriptor* descriptor, diff --git a/src/google/protobuf/compiler/cpp/cpp_file.cc b/src/google/protobuf/compiler/cpp/cpp_file.cc index 6dff1d0a21..b7697d3f29 100644 --- a/src/google/protobuf/compiler/cpp/cpp_file.cc +++ b/src/google/protobuf/compiler/cpp/cpp_file.cc @@ -1139,7 +1139,7 @@ void FileGenerator::GenerateLibraryIncludes(io::Printer* printer) { GOOGLE_CHECK(!options_.opensource_runtime); IncludeFile("net/proto2/public/weak_field_map.h", printer); } - if (HasLazyFields(file_, options_)) { + if (HasLazyFields(file_, options_, &scc_analyzer_)) { GOOGLE_CHECK(!options_.opensource_runtime); IncludeFile("net/proto2/public/lazy_field.h", printer); } diff --git a/src/google/protobuf/compiler/cpp/cpp_generator.cc b/src/google/protobuf/compiler/cpp/cpp_generator.cc index a1cd06d15c..2a6087ee41 100644 --- a/src/google/protobuf/compiler/cpp/cpp_generator.cc +++ b/src/google/protobuf/compiler/cpp/cpp_generator.cc @@ -104,6 +104,14 @@ bool CppGenerator::Generate(const FileDescriptor* file, file_options.num_cc_files = strto32(options[i].second.c_str(), NULL, 10); } + } else if (options[i].first == "annotate_accessor") { + file_options.annotate_accessor = true; + } else if (options[i].first == "inject_field_listener_events") { + file_options.inject_field_listener_events = true; + } else if (options[i].first == "eagerly_verified_lazy") { + file_options.eagerly_verified_lazy = true; + } else if (options[i].first == "force_eagerly_verified_lazy") { + file_options.force_eagerly_verified_lazy = true; } else if (options[i].first == "table_driven_parsing") { file_options.table_driven_parsing = true; } else if (options[i].first == "table_driven_serialization") { diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.cc b/src/google/protobuf/compiler/cpp/cpp_helpers.cc index 16ee07448d..c39c52a8c4 100644 --- a/src/google/protobuf/compiler/cpp/cpp_helpers.cc +++ b/src/google/protobuf/compiler/cpp/cpp_helpers.cc @@ -205,9 +205,20 @@ void SetIntVar(const Options& options, const std::string& type, std::map* variables) { (*variables)[type] = IntTypeName(options, type); } +bool IsEagerlyVerifiedLazyImpl(const FieldDescriptor* field, + const Options& options, + MessageSCCAnalyzer* scc_analyzer) { + return false; +} } // namespace +bool IsLazy(const FieldDescriptor* field, const Options& options, + MessageSCCAnalyzer* scc_analyzer) { + return IsLazilyVerifiedLazy(field, options) || + IsEagerlyVerifiedLazyImpl(field, options, scc_analyzer); +} + void SetCommonVars(const Options& options, std::map* variables) { (*variables)["proto_ns"] = ProtobufNamespace(options); @@ -785,20 +796,20 @@ std::string SafeFunctionName(const Descriptor* descriptor, return function_name; } -static bool HasLazyFields(const Descriptor* descriptor, - const Options& options) { +static bool HasLazyFields(const Descriptor* descriptor, const Options& options, + MessageSCCAnalyzer* scc_analyzer) { for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) { - if (IsLazy(descriptor->field(field_idx), options)) { + if (IsLazy(descriptor->field(field_idx), options, scc_analyzer)) { return true; } } for (int idx = 0; idx < descriptor->extension_count(); idx++) { - if (IsLazy(descriptor->extension(idx), options)) { + if (IsLazy(descriptor->extension(idx), options, scc_analyzer)) { return true; } } for (int idx = 0; idx < descriptor->nested_type_count(); idx++) { - if (HasLazyFields(descriptor->nested_type(idx), options)) { + if (HasLazyFields(descriptor->nested_type(idx), options, scc_analyzer)) { return true; } } @@ -806,15 +817,16 @@ static bool HasLazyFields(const Descriptor* descriptor, } // Does the given FileDescriptor use lazy fields? -bool HasLazyFields(const FileDescriptor* file, const Options& options) { +bool HasLazyFields(const FileDescriptor* file, const Options& options, + MessageSCCAnalyzer* scc_analyzer) { for (int i = 0; i < file->message_type_count(); i++) { const Descriptor* descriptor(file->message_type(i)); - if (HasLazyFields(descriptor, options)) { + if (HasLazyFields(descriptor, options, scc_analyzer)) { return true; } } for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) { - if (IsLazy(file->extension(field_idx), options)) { + if (IsLazy(file->extension(field_idx), options, scc_analyzer)) { return true; } } @@ -1143,6 +1155,9 @@ bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options, MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) { if (analysis_cache_.count(scc)) return analysis_cache_[scc]; MessageAnalysis result{}; + if (UsingImplicitWeakFields(scc->GetFile(), options_)) { + result.contains_weak = true; + } for (int i = 0; i < scc->descriptors.size(); i++) { const Descriptor* descriptor = scc->descriptors[i]; if (descriptor->extension_range_count() > 0) { @@ -1153,6 +1168,9 @@ MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) { if (field->is_required()) { result.contains_required = true; } + if (field->options().weak()) { + result.contains_weak = true; + } switch (field->type()) { case FieldDescriptor::TYPE_STRING: case FieldDescriptor::TYPE_BYTES: { @@ -1171,6 +1189,7 @@ MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) { if (!ShouldIgnoreRequiredFieldCheck(field, options_)) { result.contains_required |= analysis.contains_required; } + result.contains_weak |= analysis.contains_weak; } else { // This field points back into the same SCC hence the messages // in the SCC are recursive. Note if SCC contains more than two diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.h b/src/google/protobuf/compiler/cpp/cpp_helpers.h index d15ac29def..247c161ce5 100644 --- a/src/google/protobuf/compiler/cpp/cpp_helpers.h +++ b/src/google/protobuf/compiler/cpp/cpp_helpers.h @@ -331,17 +331,30 @@ inline bool IsStringPiece(const FieldDescriptor* field, EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE; } +class MessageSCCAnalyzer; + // Does the given FileDescriptor use lazy fields? -bool HasLazyFields(const FileDescriptor* file, const Options& options); +bool HasLazyFields(const FileDescriptor* file, const Options& options, + MessageSCCAnalyzer* scc_analyzer); // Is the given field a supported lazy field? -inline bool IsLazy(const FieldDescriptor* field, const Options& options) { +bool IsLazy(const FieldDescriptor* field, const Options& options, + MessageSCCAnalyzer* scc_analyzer); + +inline bool IsLazilyVerifiedLazy(const FieldDescriptor* field, + const Options& options) { return field->options().lazy() && !field->is_repeated() && field->type() == FieldDescriptor::TYPE_MESSAGE && GetOptimizeFor(field->file(), options) != FileOptions::LITE_RUNTIME && !options.opensource_runtime; } +inline bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, + const Options& options, + MessageSCCAnalyzer* scc_analyzer) { + return IsLazy(field, options, scc_analyzer) && !field->options().lazy(); +} + inline bool IsFieldUsed(const FieldDescriptor* /* field */, const Options& options) { return true; @@ -527,8 +540,8 @@ bool HasWeakFields(const FileDescriptor* desc, const Options& options); // given field. inline static bool ShouldIgnoreRequiredFieldCheck(const FieldDescriptor* field, const Options& options) { - // Do not check "required" for lazy fields. - return IsLazy(field, options); + // Do not check "required" for lazily verified lazy fields. + return IsLazilyVerifiedLazy(field, options); } struct MessageAnalysis { @@ -536,6 +549,7 @@ struct MessageAnalysis { bool contains_cord; bool contains_extension; bool contains_required; + bool contains_weak; // Implicit weak as well. }; // This class is used in FileGenerator, to ensure linear instead of @@ -552,6 +566,10 @@ class PROTOC_EXPORT MessageSCCAnalyzer { MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor)); return result.contains_required || result.contains_extension; } + bool HasWeakField(const Descriptor* descriptor) { + MessageAnalysis result = GetSCCAnalysis(GetSCC(descriptor)); + return result.contains_weak; + } const SCC* GetSCC(const Descriptor* descriptor) { return analyzer_.GetSCC(descriptor); } diff --git a/src/google/protobuf/compiler/cpp/cpp_message.cc b/src/google/protobuf/compiler/cpp/cpp_message.cc index 68840d57aa..be6f348108 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message.cc +++ b/src/google/protobuf/compiler/cpp/cpp_message.cc @@ -168,14 +168,16 @@ bool IsPOD(const FieldDescriptor* field) { // Anything that is a POD or a "normal" message (represented by a pointer) can // be manipulated as raw bytes. bool CanBeManipulatedAsRawBytes(const FieldDescriptor* field, - const Options& options) { + const Options& options, + MessageSCCAnalyzer* scc_analyzer) { bool ret = CanInitializeByZeroing(field); // Non-repeated, non-lazy message fields are simply raw pointers, so we can // swap them or use memset to initialize these in SharedCtor. We cannot use // this in Clear, as we need to potentially delete the existing value. - ret = ret || (!field->is_repeated() && !IsLazy(field, options) && - field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE); + ret = + ret || (!field->is_repeated() && !IsLazy(field, options, scc_analyzer) && + field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE); return ret; } @@ -218,16 +220,18 @@ bool EmitFieldNonDefaultCondition(io::Printer* printer, // if non-zero (numeric) or non-empty (string). if (!field->is_repeated() && !field->containing_oneof()) { if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { - format("if (!$prefix$$name$().empty()) {\n"); + format("if (!$prefix$_internal_$name$().empty()) {\n"); } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { // Message fields still have has_$name$() methods. - format("if ($prefix$has_$name$()) {\n"); + format("if ($prefix$_internal_has_$name$()) {\n"); } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_DOUBLE || field->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT) { // Handle float comparison to prevent -Wfloat-equal warnings - format("if (!($prefix$$name$() <= 0 && $prefix$$name$() >= 0)) {\n"); + format( + "if (!($prefix$_internal_$name$() <= 0 && $prefix$_internal_$name$() " + ">= 0)) {\n"); } else { - format("if ($prefix$$name$() != 0) {\n"); + format("if ($prefix$_internal_$name$() != 0) {\n"); } format.Indent(); return true; @@ -313,7 +317,8 @@ bool ShouldSerializeInOrder(const Descriptor* descriptor, } bool TableDrivenParsingEnabled(const Descriptor* descriptor, - const Options& options) { + const Options& options, + MessageSCCAnalyzer* scc_analyzer) { if (!options.table_driven_parsing) { return false; } @@ -344,7 +349,7 @@ bool TableDrivenParsingEnabled(const Descriptor* descriptor, } // - There are no lazy fields (they require the non-lite library). - if (IsLazy(field, options)) { + if (IsLazy(field, options, scc_analyzer)) { return false; } } @@ -579,6 +584,35 @@ MessageGenerator::MessageGenerator( variables_["annotate_reflection"] = ""; variables_["annotate_bytesize"] = ""; + if (options.inject_field_listener_events && + descriptor->file()->options().optimize_for() != + google::protobuf::FileOptions::LITE_RUNTIME) { + const std::string injector_template = StrCat( + " {\n" + " auto _listener_ = ::", + variables_["proto_ns"], + "::FieldAccessListener::GetListener();\n" + " if (_listener_) "); + + StrAppend(&variables_["annotate_serialize"], injector_template, + "_listener_->OnSerializationAccess(this);\n" + " }\n"); + StrAppend(&variables_["annotate_deserialize"], injector_template, + " _listener_->OnDeserializationAccess(this);\n" + " }\n"); + // TODO(danilak): Ideally annotate_reflection should not exist and we need + // to annotate all reflective calls on our own, however, as this is a cause + // for side effects, i.e. reading values dynamically, we want the users know + // that dynamic access can happen. + StrAppend(&variables_["annotate_reflection"], injector_template, + "_listener_->OnReflectionAccess(default_instance()" + ".GetMetadata().descriptor);\n" + " }\n"); + StrAppend(&variables_["annotate_bytesize"], injector_template, + "_listener_->OnByteSizeAccess(this);\n" + " }\n"); + } + SetUnknownFieldsVariable(descriptor_, options_, &variables_); // Compute optimized field order to be used for layout and initialization @@ -595,7 +629,8 @@ MessageGenerator::MessageGenerator( } } - message_layout_helper_->OptimizeLayout(&optimized_order_, options_); + message_layout_helper_->OptimizeLayout(&optimized_order_, options_, + scc_analyzer_); // This message has hasbits iff one or more fields need one. for (auto field : optimized_order_) { @@ -618,7 +653,8 @@ MessageGenerator::MessageGenerator( } } - table_driven_ = TableDrivenParsingEnabled(descriptor_, options_); + table_driven_ = + TableDrivenParsingEnabled(descriptor_, options_, scc_analyzer_); parse_function_generator_.reset(new ParseFunctionGenerator( descriptor_, max_has_bit_index_, has_bit_indices_, options_, scc_analyzer_, variables_)); @@ -794,7 +830,7 @@ void MessageGenerator::GenerateSingularFieldHasBits( "(_has_bits_[$has_array_index$] & 0x$has_mask$u) != 0;\n"); if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && - !IsLazy(field, options_)) { + !IsLazy(field, options_, scc_analyzer_)) { // We maintain the invariant that for a submessage x, has_x() returning // true implies that x_ is not null. By giving this information to the // compiler, we allow it to eliminate unnecessary null checks later on. @@ -810,7 +846,7 @@ void MessageGenerator::GenerateSingularFieldHasBits( "}\n"); } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { // Message fields have a has_$name$() method. - if (IsLazy(field, options_)) { + if (IsLazy(field, options_, scc_analyzer_)) { format( "inline bool $classname$::_internal_has_$name$() const {\n" " return !$name$_.IsCleared();\n" @@ -1853,7 +1889,7 @@ int MessageGenerator::GenerateFieldMetadata(io::Printer* printer) { const FieldGenerator& generator = field_generators_.get(field); int type = CalcFieldNum(generator, field, options_); - if (IsLazy(field, options_)) { + if (IsLazy(field, options_, scc_analyzer_)) { type = internal::FieldMetadata::kSpecial; ptr = "reinterpret_cast(::" + variables_["proto_ns"] + "::internal::LazyFieldSerializer"; @@ -2311,6 +2347,8 @@ std::pair MessageGenerator::GenerateOffsets( if (!IsFieldUsed(field, options_)) { format(" | 0x80000000u, // unused\n"); + } else if (IsEagerlyVerifiedLazy(field, options_, scc_analyzer_)) { + format(" | 0x1u, // eagerly verified lazy\n"); } else { format(",\n"); } @@ -2486,7 +2524,7 @@ void MessageGenerator::GenerateConstructorBody(io::Printer* printer, optimized_order_, [copy_constructor, this](const FieldDescriptor* field) { return (copy_constructor && IsPOD(field)) || (!copy_constructor && - CanBeManipulatedAsRawBytes(field, options_)); + CanBeManipulatedAsRawBytes(field, options_, scc_analyzer_)); }); std::string pod_template; @@ -2554,7 +2592,8 @@ void MessageGenerator::GenerateStructors(io::Printer* printer) { GOOGLE_DCHECK(!IsFieldStripped(field, options_)); bool has_arena_constructor = field->is_repeated(); if (!field->real_containing_oneof() && - (IsLazy(field, options_) || IsStringPiece(field, options_))) { + (IsLazy(field, options_, scc_analyzer_) || + IsStringPiece(field, options_))) { has_arena_constructor = true; } if (has_arena_constructor) { @@ -2961,7 +3000,7 @@ void MessageGenerator::GenerateSwap(io::Printer* printer) { // If possible, we swap several fields at once, including padding. const RunMap runs = FindRuns(optimized_order_, [this](const FieldDescriptor* field) { - return CanBeManipulatedAsRawBytes(field, options_); + return CanBeManipulatedAsRawBytes(field, options_, scc_analyzer_); }); for (int i = 0; i < optimized_order_.size(); ++i) { @@ -4018,6 +4057,13 @@ void MessageGenerator::GenerateIsInitialized(io::Printer* printer) { } } else if (field->options().weak()) { continue; + } else if (IsEagerlyVerifiedLazy(field, options_, scc_analyzer_)) { + GOOGLE_CHECK(!field->real_containing_oneof()); + format( + "if (_internal_has_$1$()) {\n" + " if (!$1$().IsInitialized()) return false;\n" + "}\n", + FieldName(field)); } else { GOOGLE_CHECK(!field->real_containing_oneof()); format( diff --git a/src/google/protobuf/compiler/cpp/cpp_message_field.cc b/src/google/protobuf/compiler/cpp/cpp_message_field.cc index c04c83ad67..ee677ea4f0 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_message_field.cc @@ -200,9 +200,15 @@ void MessageFieldGenerator::GenerateInlineAccessorDefinitions( " $clear_hasbit$\n" " $type$* temp = $casted_member$;\n" " $name$_ = nullptr;\n" + "#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE\n" + " auto* old = reinterpret_cast<::$proto_ns$::MessageLite*>(temp);\n" + " temp = ::$proto_ns$::internal::DuplicateIfNonNull(temp);\n" + " if (GetArenaForAllocation() == nullptr) { delete old; }\n" + "#else // PROTOBUF_FORCE_COPY_IN_RELEASE\n" " if (GetArenaForAllocation() != nullptr) {\n" " temp = ::$proto_ns$::internal::DuplicateIfNonNull(temp);\n" " }\n" + "#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE\n" " return temp;\n" "}\n" "inline $type$* $classname$::unsafe_arena_release_$name$() {\n" diff --git a/src/google/protobuf/compiler/cpp/cpp_message_layout_helper.h b/src/google/protobuf/compiler/cpp/cpp_message_layout_helper.h index 67eeff0ae7..9d8063d9ca 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message_layout_helper.h +++ b/src/google/protobuf/compiler/cpp/cpp_message_layout_helper.h @@ -43,6 +43,8 @@ namespace protobuf { namespace compiler { namespace cpp { +class MessageSCCAnalyzer; + // Provides an abstract interface to optimize message layout // by rearranging the fields of a message. class MessageLayoutHelper { @@ -50,7 +52,8 @@ class MessageLayoutHelper { virtual ~MessageLayoutHelper() {} virtual void OptimizeLayout(std::vector* fields, - const Options& options) = 0; + const Options& options, + MessageSCCAnalyzer* scc_analyzer) = 0; }; } // namespace cpp diff --git a/src/google/protobuf/compiler/cpp/cpp_options.h b/src/google/protobuf/compiler/cpp/cpp_options.h index 04142eeb92..2e97c3dde0 100644 --- a/src/google/protobuf/compiler/cpp/cpp_options.h +++ b/src/google/protobuf/compiler/cpp/cpp_options.h @@ -75,6 +75,8 @@ struct Options { kTCTableAlways } tctable_mode = kTCTableNever; bool inject_field_listener_events = false; + bool eagerly_verified_lazy = false; + bool force_eagerly_verified_lazy = false; }; } // namespace cpp diff --git a/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.cc b/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.cc index 2f78bda520..0b660c75b7 100644 --- a/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.cc +++ b/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.cc @@ -118,7 +118,8 @@ class FieldGroup { // // OTHER these fields are initialized one-by-one. void PaddingOptimizer::OptimizeLayout( - std::vector* fields, const Options& options) { + std::vector* fields, const Options& options, + MessageSCCAnalyzer* scc_analyzer) { // The sorted numeric order of Family determines the declaration order in the // memory layout. enum Family { @@ -147,7 +148,7 @@ void PaddingOptimizer::OptimizeLayout( f = STRING; } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { f = MESSAGE; - if (IsLazy(field, options)) { + if (IsLazy(field, options, scc_analyzer)) { f = LAZY_MESSAGE; } } else if (CanInitializeByZeroing(field)) { diff --git a/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.h b/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.h index 2382081bab..ebdb17de61 100644 --- a/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.h +++ b/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.h @@ -53,7 +53,8 @@ class PaddingOptimizer : public MessageLayoutHelper { ~PaddingOptimizer() override {} void OptimizeLayout(std::vector* fields, - const Options& options) override; + const Options& options, + MessageSCCAnalyzer* scc_analyzer) override; }; } // namespace cpp diff --git a/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.cc b/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.cc index a5798f7df4..04aab98907 100644 --- a/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.cc @@ -94,8 +94,8 @@ std::string MessageParseFunctionName(const FieldDescriptor* field, } else { name.append("Singular"); } - name.append("ParseMessage<" + ClassName(field->message_type()) + ", " + - TagType(field) + ">"); + name.append("ParseMessage<" + QualifiedClassName(field->message_type()) + + ", " + TagType(field) + ">"); return name; } @@ -198,9 +198,23 @@ TailCallTableInfo::TailCallTableInfo(const Descriptor* descriptor, case FieldDescriptor::TYPE_SFIXED32: case FieldDescriptor::TYPE_DOUBLE: case FieldDescriptor::TYPE_FLOAT: + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_SINT64: + case FieldDescriptor::TYPE_SINT32: + case FieldDescriptor::TYPE_BOOL: name = FieldParseFunctionName(field, options, table_size_log2); break; + case FieldDescriptor::TYPE_BYTES: + if (field->options().ctype() == FieldOptions::STRING && + field->default_value_string().empty()) { + name = FieldParseFunctionName(field, options, table_size_log2); + } + break; + default: break; } @@ -263,10 +277,10 @@ void ParseFunctionGenerator::GenerateMethodDecls(io::Printer* printer) { if (tc_table_info_->use_generated_fallback) { format( "static const char* Tct_ParseFallback(\n" - " ::google::protobuf::MessageLite *msg, const char *ptr,\n" - " ::google::protobuf::internal::ParseContext *ctx,\n" - " const ::google::protobuf::internal::TailCallParseTableBase *table,\n" - " uint64_t hasbits, ::google::protobuf::internal::TcFieldData data);\n" + " ::$proto_ns$::MessageLite *msg, const char *ptr,\n" + " ::$proto_ns$::internal::ParseContext *ctx,\n" + " const ::$proto_ns$::internal::TailCallParseTableBase *table,\n" + " uint64_t hasbits, ::$proto_ns$::internal::TcFieldData data);\n" "inline const char* Tct_FallbackImpl(\n" " const char* ptr, ::$proto_ns$::internal::ParseContext* ctx,\n" " const void*, $uint64$ hasbits);\n"); @@ -646,7 +660,7 @@ void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format, } else { format("ptr = ctx->ParseMessage(&$1$_, ptr);\n", FieldName(field)); } - } else if (IsLazy(field, options_)) { + } else if (IsLazy(field, options_, scc_analyzer_)) { if (field->real_containing_oneof()) { format( "if (!_internal_has_$1$()) {\n" @@ -988,19 +1002,27 @@ std::string FieldParseFunctionName(const FieldDescriptor* field, break; case FieldDescriptor::TYPE_STRING: - type_format = TypeFormat::kString; + switch (GetUtf8CheckMode(field, options)) { + case Utf8CheckMode::kNone: + type_format = TypeFormat::kBytes; + break; + case Utf8CheckMode::kStrict: + type_format = TypeFormat::kString; + break; + case Utf8CheckMode::kVerify: + type_format = TypeFormat::kStringValidateOnly; + break; + } break; - case FieldDescriptor::TYPE_GROUP: - case FieldDescriptor::TYPE_MESSAGE: - case FieldDescriptor::TYPE_ENUM: + default: GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString(); return ""; } return "::" + ProtobufNamespace(options) + "::internal::" + GetTailCallFieldHandlerName(card, type_format, table_size_log2, - TagSize(field->number())); + TagSize(field->number()), options); } } // namespace @@ -1008,7 +1030,8 @@ std::string FieldParseFunctionName(const FieldDescriptor* field, std::string GetTailCallFieldHandlerName(ParseCardinality card, TypeFormat type_format, int table_size_log2, - int tag_length_bytes) { + int tag_length_bytes, + const Options& options) { std::string name; switch (card) { @@ -1058,6 +1081,20 @@ std::string GetTailCallFieldHandlerName(ParseCardinality card, name.append("Fixed"); break; + case TypeFormat::kVar64: + case TypeFormat::kVar32: + case TypeFormat::kSInt64: + case TypeFormat::kSInt32: + case TypeFormat::kBool: + name.append("Varint"); + break; + + case TypeFormat::kBytes: + case TypeFormat::kString: + case TypeFormat::kStringValidateOnly: + name.append("String"); + break; + default: break; } @@ -1069,35 +1106,59 @@ std::string GetTailCallFieldHandlerName(ParseCardinality card, switch (type_format) { case TypeFormat::kVar64: case TypeFormat::kFixed64: - name.append("uint64_t"); + name.append("uint64_t, "); break; case TypeFormat::kSInt64: - name.append("int64_t"); + name.append("int64_t, "); break; case TypeFormat::kVar32: case TypeFormat::kFixed32: - name.append("uint32_t"); + name.append("uint32_t, "); break; case TypeFormat::kSInt32: - name.append("int32_t"); + name.append("int32_t, "); break; case TypeFormat::kBool: - name.append("bool"); + name.append("bool, "); break; default: - GOOGLE_LOG(FATAL) << static_cast(type_format); - return ""; + break; } - name.append(", "); name.append(CodedTagType(tag_length_bytes)); + std::string tcpb = + StrCat(ProtobufNamespace(options), "::internal::TcParserBase"); + switch (type_format) { + case TypeFormat::kVar64: + case TypeFormat::kVar32: + case TypeFormat::kBool: + name.append(StrCat(", ::", tcpb, "::kNoConversion")); + break; + + case TypeFormat::kSInt64: + case TypeFormat::kSInt32: + name.append(StrCat(", ::", tcpb, "::kZigZag")); + break; + + case TypeFormat::kBytes: + name.append(StrCat(", ::", tcpb, "::kNoUtf8")); + break; + + case TypeFormat::kString: + name.append(StrCat(", ::", tcpb, "::kUtf8")); + break; + + case TypeFormat::kStringValidateOnly: + name.append(StrCat(", ::", tcpb, "::kUtf8ValidateOnly")); + break; + default: break; } diff --git a/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.h b/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.h index 00719a8f13..116353af34 100644 --- a/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.h +++ b/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.h @@ -169,7 +169,8 @@ enum class TypeFormat { std::string GetTailCallFieldHandlerName(ParseCardinality card, TypeFormat type_format, int table_size_log2, - int tag_length_bytes); + int tag_length_bytes, + const Options& options); } // namespace cpp } // namespace compiler diff --git a/src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto b/src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto index 479710821f..466a84194a 100644 --- a/src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto +++ b/src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto @@ -41,7 +41,7 @@ syntax = "proto2"; // Some generic_services option(s) added automatically. // See: http://go/proto2-generic-services-default -option cc_generic_services = true; // auto-added +option cc_generic_services = true; // auto-added // We don't put this in a package within proto2 because we need to make sure // that the generated code doesn't depend on being in the proto2 namespace. @@ -158,11 +158,11 @@ message TestConflictingEnumNames { // NO_PROTO3 optional while conflicting_enum = 1; // NO_PROTO3 } // NO_PROTO3 -enum bool { // NO_PROTO3 - default = 0; // NO_PROTO3 - NOT_EQ = 1; // NO_PROTO3 - volatile = 2; // NO_PROTO3 - return = 3; // NO_PROTO3 +enum bool { // NO_PROTO3 + default = 0; // NO_PROTO3 + NOT_EQ = 1; // NO_PROTO3 + volatile = 2; // NO_PROTO3 + return = 3; // NO_PROTO3 } // NO_PROTO3 message DummyMessage {} @@ -173,7 +173,7 @@ message NULL { extend TestConflictingSymbolNames { // NO_PROTO3 optional int32 void = 314253; // NO_PROTO3 -} // NO_PROTO3 +} // NO_PROTO3 // Message names that could conflict. message Shutdown {} diff --git a/src/google/protobuf/compiler/plugin.pb.h b/src/google/protobuf/compiler/plugin.pb.h index 4b114383b5..7e22a02075 100644 --- a/src/google/protobuf/compiler/plugin.pb.h +++ b/src/google/protobuf/compiler/plugin.pb.h @@ -1337,9 +1337,15 @@ inline PROTOBUF_NAMESPACE_ID::compiler::Version* CodeGeneratorRequest::release_c _has_bits_[0] &= ~0x00000002u; PROTOBUF_NAMESPACE_ID::compiler::Version* temp = compiler_version_; compiler_version_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::compiler::Version* CodeGeneratorRequest::unsafe_arena_release_compiler_version() { @@ -1595,9 +1601,15 @@ inline PROTOBUF_NAMESPACE_ID::GeneratedCodeInfo* CodeGeneratorResponse_File::rel _has_bits_[0] &= ~0x00000008u; PROTOBUF_NAMESPACE_ID::GeneratedCodeInfo* temp = generated_code_info_; generated_code_info_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::GeneratedCodeInfo* CodeGeneratorResponse_File::unsafe_arena_release_generated_code_info() { diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index 40ded3a1eb..bedb5b38c3 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -786,13 +786,7 @@ class FileDescriptorTables { mutable internal::WrappedMutex unknown_enum_values_mu_; }; -DescriptorPool::Tables::Tables() - // Start some hash-map and hash-set objects with a small # of buckets - : known_bad_files_(3), - known_bad_symbols_(3), - extensions_loaded_from_db_(3), - symbols_by_name_(3), - files_by_name_(3) { +DescriptorPool::Tables::Tables() { well_known_types_.insert({ {"google.protobuf.DoubleValue", Descriptor::WELLKNOWNTYPE_DOUBLEVALUE}, {"google.protobuf.FloatValue", Descriptor::WELLKNOWNTYPE_FLOATVALUE}, @@ -816,16 +810,8 @@ DescriptorPool::Tables::Tables() DescriptorPool::Tables::~Tables() { GOOGLE_DCHECK(checkpoints_.empty()); } FileDescriptorTables::FileDescriptorTables() - // Initialize all the hash tables to start out with a small # of buckets. - : symbols_by_parent_(3), - fields_by_lowercase_name_(3), - fields_by_lowercase_name_tmp_(new FieldsByNameMap()), - fields_by_camelcase_name_(3), - fields_by_camelcase_name_tmp_(new FieldsByNameMap()), - fields_by_number_(3), - enum_values_by_number_(3), - unknown_enum_values_by_number_(3), - locations_by_path_(3) {} + : fields_by_lowercase_name_tmp_(new FieldsByNameMap()), + fields_by_camelcase_name_tmp_(new FieldsByNameMap()) {} FileDescriptorTables::~FileDescriptorTables() {} diff --git a/src/google/protobuf/descriptor.pb.h b/src/google/protobuf/descriptor.pb.h index 1029bd8cec..5322d372a5 100644 --- a/src/google/protobuf/descriptor.pb.h +++ b/src/google/protobuf/descriptor.pb.h @@ -7113,9 +7113,15 @@ inline PROTOBUF_NAMESPACE_ID::FileOptions* FileDescriptorProto::release_options( _has_bits_[0] &= ~0x00000008u; PROTOBUF_NAMESPACE_ID::FileOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::FileOptions* FileDescriptorProto::unsafe_arena_release_options() { @@ -7197,9 +7203,15 @@ inline PROTOBUF_NAMESPACE_ID::SourceCodeInfo* FileDescriptorProto::release_sourc _has_bits_[0] &= ~0x00000010u; PROTOBUF_NAMESPACE_ID::SourceCodeInfo* temp = source_code_info_; source_code_info_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::SourceCodeInfo* FileDescriptorProto::unsafe_arena_release_source_code_info() { @@ -7399,9 +7411,15 @@ inline PROTOBUF_NAMESPACE_ID::ExtensionRangeOptions* DescriptorProto_ExtensionRa _has_bits_[0] &= ~0x00000001u; PROTOBUF_NAMESPACE_ID::ExtensionRangeOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::ExtensionRangeOptions* DescriptorProto_ExtensionRange::unsafe_arena_release_options() { @@ -7845,9 +7863,15 @@ inline PROTOBUF_NAMESPACE_ID::MessageOptions* DescriptorProto::release_options() _has_bits_[0] &= ~0x00000002u; PROTOBUF_NAMESPACE_ID::MessageOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::MessageOptions* DescriptorProto::unsafe_arena_release_options() { @@ -8496,9 +8520,15 @@ inline PROTOBUF_NAMESPACE_ID::FieldOptions* FieldDescriptorProto::release_option _has_bits_[0] &= ~0x00000020u; PROTOBUF_NAMESPACE_ID::FieldOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::FieldOptions* FieldDescriptorProto::unsafe_arena_release_options() { @@ -8670,9 +8700,15 @@ inline PROTOBUF_NAMESPACE_ID::OneofOptions* OneofDescriptorProto::release_option _has_bits_[0] &= ~0x00000002u; PROTOBUF_NAMESPACE_ID::OneofOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::OneofOptions* OneofDescriptorProto::unsafe_arena_release_options() { @@ -8916,9 +8952,15 @@ inline PROTOBUF_NAMESPACE_ID::EnumOptions* EnumDescriptorProto::release_options( _has_bits_[0] &= ~0x00000002u; PROTOBUF_NAMESPACE_ID::EnumOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::EnumOptions* EnumDescriptorProto::unsafe_arena_release_options() { @@ -9205,9 +9247,15 @@ inline PROTOBUF_NAMESPACE_ID::EnumValueOptions* EnumValueDescriptorProto::releas _has_bits_[0] &= ~0x00000002u; PROTOBUF_NAMESPACE_ID::EnumValueOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::EnumValueOptions* EnumValueDescriptorProto::unsafe_arena_release_options() { @@ -9391,9 +9439,15 @@ inline PROTOBUF_NAMESPACE_ID::ServiceOptions* ServiceDescriptorProto::release_op _has_bits_[0] &= ~0x00000002u; PROTOBUF_NAMESPACE_ID::ServiceOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::ServiceOptions* ServiceDescriptorProto::unsafe_arena_release_options() { @@ -9653,9 +9707,15 @@ inline PROTOBUF_NAMESPACE_ID::MethodOptions* MethodDescriptorProto::release_opti _has_bits_[0] &= ~0x00000008u; PROTOBUF_NAMESPACE_ID::MethodOptions* temp = options_; options_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::MethodOptions* MethodDescriptorProto::unsafe_arena_release_options() { diff --git a/src/google/protobuf/duration.pb.cc b/src/google/protobuf/duration.pb.cc index d889cc169c..13fc338475 100644 --- a/src/google/protobuf/duration.pb.cc +++ b/src/google/protobuf/duration.pb.cc @@ -189,13 +189,13 @@ failure: (void) cached_has_bits; // int64 seconds = 1; - if (this->seconds() != 0) { + if (this->_internal_seconds() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(1, this->_internal_seconds(), target); } // int32 nanos = 2; - if (this->nanos() != 0) { + if (this->_internal_nanos() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->_internal_nanos(), target); } @@ -217,14 +217,14 @@ size_t Duration::ByteSizeLong() const { (void) cached_has_bits; // int64 seconds = 1; - if (this->seconds() != 0) { + if (this->_internal_seconds() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( this->_internal_seconds()); } // int32 nanos = 2; - if (this->nanos() != 0) { + if (this->_internal_nanos() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( this->_internal_nanos()); @@ -258,10 +258,10 @@ void Duration::MergeFrom(const Duration& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.seconds() != 0) { + if (from._internal_seconds() != 0) { _internal_set_seconds(from._internal_seconds()); } - if (from.nanos() != 0) { + if (from._internal_nanos() != 0) { _internal_set_nanos(from._internal_nanos()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); diff --git a/src/google/protobuf/field_access_listener.cc b/src/google/protobuf/field_access_listener.cc new file mode 100644 index 0000000000..56e175a7ac --- /dev/null +++ b/src/google/protobuf/field_access_listener.cc @@ -0,0 +1,52 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include + +namespace google { +namespace protobuf { + +internal::once_flag FieldAccessListener::register_once_ = {}; +FieldAccessListener* FieldAccessListener::field_listener_ = nullptr; + +FieldAccessListener* FieldAccessListener::GetListener() { + return field_listener_; +} + +void FieldAccessListener::RegisterListener(FieldAccessListener* listener) { + // TODO(danilak): Add a GOOGLE_DCHECK for message_injector_ to be nullptr and update + // tests. + internal::call_once(register_once_, [&] { field_listener_ = listener; }); +} + +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/field_access_listener.h b/src/google/protobuf/field_access_listener.h new file mode 100644 index 0000000000..660ad73a8a --- /dev/null +++ b/src/google/protobuf/field_access_listener.h @@ -0,0 +1,246 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_PROTOBUF_FIELD_ACCESS_LISTENER_H__ +#define GOOGLE_PROTOBUF_FIELD_ACCESS_LISTENER_H__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +namespace google { +namespace protobuf { +namespace internal { +template +struct ResolvedType { + using type = T; +}; +} // namespace internal +// Tracks the events of field accesses for all protos +// that are built with --inject_field_listener_events. This is a global +// interface which you must implement yourself and register with +// RegisterListener() function. All events consist of Descriptors, +// FieldAccessTypes and the underlying storage for tracking the memory which is +// accessed where possible and makes sense. Users are responsible for the +// implementations to be thread safe. +class FieldAccessListener { + public: + FieldAccessListener() = default; + virtual ~FieldAccessListener() = default; + + // The memory annotations of the proto fields that are touched by the + // accessors. They are returned as if the operation completes. + struct DataAnnotation { + DataAnnotation() = default; + DataAnnotation(const void* other_address, size_t other_size) + : address(other_address), size(other_size) {} + const void* address = nullptr; + size_t size = 0; + }; + using AddressInfo = std::vector; + using AddressInfoExtractor = std::function; + + enum class FieldAccessType { + kAdd, // add_(f) + kAddMutable, // add_() + kGet, // () and (i) + kClear, // clear_() + kHas, // has_() + kList, // () + kMutable, // mutable_() + kMutableList, // mutable_() + kRelease, // release_() + kSet, // set_() and set_(i) + kSize, // _size() + }; + + static FieldAccessListener* GetListener(); + + // Registers the field listener, can be called only once, |listener| must + // outlive all proto accesses (in most cases, the lifetime of the program). + static void RegisterListener(FieldAccessListener* listener); + + // All field accessors noted in FieldAccessType have this call. + // |extractor| extracts the address info from the field + virtual void OnFieldAccess(const AddressInfoExtractor& extractor, + const FieldDescriptor* descriptor, + FieldAccessType access_type) = 0; + + // Side effect calls. + virtual void OnDeserializationAccess(const Message* message) = 0; + virtual void OnSerializationAccess(const Message* message) = 0; + virtual void OnReflectionAccess(const Descriptor* descriptor) = 0; + virtual void OnByteSizeAccess(const Message* message) = 0; + // We can probably add more if we need to, like {Merge,Copy}{From}Access. + + // Extracts all the addresses from the underlying fields. + template + AddressInfo ExtractFieldInfo(const T* field_value); + + + private: + template + AddressInfo ExtractFieldInfoSpecific(const T* field_value, + internal::ResolvedType); + + AddressInfo ExtractFieldInfoSpecific(const Message* field_value, + internal::ResolvedType); + + AddressInfo ExtractFieldInfoSpecific(const std::string* field_value, + internal::ResolvedType); + + AddressInfo ExtractFieldInfoSpecific( + const internal::ArenaStringPtr* field_value, + internal::ResolvedType); + + template + AddressInfo ExtractFieldInfoSpecific( + const RepeatedField* field_value, + internal::ResolvedType>); + + template + AddressInfo ExtractFieldInfoSpecific( + const RepeatedPtrField* field_value, + internal::ResolvedType>); + + template + AddressInfo ExtractFieldInfoSpecific(const Map* field_value, + internal::ResolvedType>); + + static internal::once_flag register_once_; + static FieldAccessListener* field_listener_; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldAccessListener); +}; + +template +inline FieldAccessListener::AddressInfo FieldAccessListener::ExtractFieldInfo( + const T* field_value) { + return ExtractFieldInfoSpecific(field_value, internal::ResolvedType()); +} + + +template +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific(const T* field_value, + internal::ResolvedType) { + static_assert(std::is_trivial::value, + "This overload should be chosen only for trivial types"); + return FieldAccessListener::AddressInfo{FieldAccessListener::DataAnnotation( + static_cast(field_value), sizeof(*field_value))}; +} + +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific( + const std::string* field_value, internal::ResolvedType) { + return FieldAccessListener::AddressInfo{FieldAccessListener::DataAnnotation( + static_cast(field_value->c_str()), field_value->length())}; +} + +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific( + const internal::ArenaStringPtr* field_value, + internal::ResolvedType) { + return FieldAccessListener::ExtractFieldInfoSpecific( + field_value->GetPointer(), internal::ResolvedType()); +} + +template +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific( + const RepeatedField* field_value, + internal::ResolvedType>) { + // TODO(jianzhouzh): This can cause data races. Synchronize this if needed. + FieldAccessListener::AddressInfo address_info; + address_info.reserve(field_value->size()); + for (int i = 0, ie = field_value->size(); i < ie; ++i) { + auto sub = ExtractFieldInfoSpecific(&field_value->Get(i), + internal::ResolvedType()); + address_info.insert(address_info.end(), sub.begin(), sub.end()); + } + return address_info; +} + +template +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific( + const RepeatedPtrField* field_value, + internal::ResolvedType>) { + FieldAccessListener::AddressInfo address_info; + // TODO(jianzhouzh): This can cause data races. Synchronize this if needed. + address_info.reserve(field_value->size()); + for (int i = 0, ie = field_value->size(); i < ie; ++i) { + auto sub = ExtractFieldInfoSpecific(&field_value->Get(i), + internal::ResolvedType()); + address_info.insert(address_info.end(), sub.begin(), sub.end()); + } + return address_info; +} + +template +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific( + const Map* field_value, internal::ResolvedType>) { + // TODO(jianzhouzh): This can cause data races. Synchronize this if needed. + FieldAccessListener::AddressInfo address_info; + address_info.reserve(field_value->size()); + for (auto it = field_value->begin(); it != field_value->end(); ++it) { + auto sub_first = + ExtractFieldInfoSpecific(&it->first, internal::ResolvedType()); + auto sub_second = + ExtractFieldInfoSpecific(&it->second, internal::ResolvedType()); + address_info.insert(address_info.end(), sub_first.begin(), sub_first.end()); + address_info.insert(address_info.end(), sub_second.begin(), + sub_second.end()); + } + return address_info; +} + +inline FieldAccessListener::AddressInfo +FieldAccessListener::ExtractFieldInfoSpecific(const Message* field_value, + internal::ResolvedType) { + // TODO(jianzhouzh): implement and adjust all annotations in the compiler. + return {}; +} + +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_FIELD_ACCESS_LISTENER_H__ diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index e743dd11e4..f50352f2ce 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -76,6 +76,17 @@ namespace protobuf { namespace { bool IsMapFieldInApi(const FieldDescriptor* field) { return field->is_map(); } + +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE +Message* MaybeForceCopy(Arena* arena, Message* msg) { + if (arena != nullptr || msg == nullptr) return msg; + + Message* copy = msg->New(); + copy->MergeFrom(*msg); + delete msg; + return copy; +} +#endif // PROTOBUF_FORCE_COPY_IN_RELEASE } // anonymous namespace namespace internal { @@ -512,13 +523,13 @@ void SwapFieldHelper::SwapMessage(const Reflection* r, Message* lhs, if (*lhs_sub != nullptr && *rhs_sub != nullptr) { (*lhs_sub)->GetReflection()->Swap(*lhs_sub, *rhs_sub); - } else if (*lhs_sub == nullptr) { + } else if (*lhs_sub == nullptr && r->HasBit(*rhs, field)) { *lhs_sub = (*rhs_sub)->New(lhs_arena); (*lhs_sub)->CopyFrom(**rhs_sub); r->ClearField(rhs, field); // Ensures has bit is unchanged after ClearField. r->SetBit(rhs, field); - } else { + } else if (*rhs_sub == nullptr && r->HasBit(*lhs, field)) { *rhs_sub = (*lhs_sub)->New(rhs_arena); (*rhs_sub)->CopyFrom(**lhs_sub); r->ClearField(lhs, field); @@ -649,14 +660,14 @@ void Reflection::SwapOneofField(Message* message1, Message* message2, uint32 oneof_case1 = GetOneofCase(*message1, oneof_descriptor); uint32 oneof_case2 = GetOneofCase(*message2, oneof_descriptor); - int32 temp_int32; - int64 temp_int64; - uint32 temp_uint32; - uint64 temp_uint64; - float temp_float; - double temp_double; - bool temp_bool; - int temp_int; + int32 temp_int32 = 0; + int64 temp_int64 = 0; + uint32 temp_uint32 = 0; + uint64 temp_uint64 = 0; + float temp_float = 0; + double temp_double = 0; + bool temp_bool = false; + int temp_int = 0; Message* temp_message = nullptr; std::string temp_string; @@ -1196,19 +1207,25 @@ Message* Reflection::ReleaseLast(Message* message, USAGE_CHECK_ALL(ReleaseLast, REPEATED, MESSAGE); CheckInvalidAccess(schema_, field); + Message* released; if (field->is_extension()) { - return static_cast( + released = static_cast( MutableExtensionSet(message)->ReleaseLast(field->number())); } else { if (IsMapFieldInApi(field)) { - return MutableRaw(message, field) - ->MutableRepeatedField() - ->ReleaseLast >(); + released = MutableRaw(message, field) + ->MutableRepeatedField() + ->ReleaseLast>(); } else { - return MutableRaw(message, field) - ->ReleaseLast >(); + released = MutableRaw(message, field) + ->ReleaseLast>(); } } +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + return MaybeForceCopy(message->GetArenaForAllocation(), released); +#else // PROTOBUF_FORCE_COPY_IN_RELEASE + return released; +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE } void Reflection::SwapElements(Message* message, const FieldDescriptor* field, @@ -1918,6 +1935,9 @@ Message* Reflection::ReleaseMessage(Message* message, CheckInvalidAccess(schema_, field); Message* released = UnsafeArenaReleaseMessage(message, field, factory); +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + released = MaybeForceCopy(message->GetArenaForAllocation(), released); +#endif // PROTOBUF_FORCE_COPY_IN_RELEASE if (message->GetArenaForAllocation() != nullptr && released != nullptr) { Message* copy_from_arena = released->New(); copy_from_arena->CopyFrom(*released); diff --git a/src/google/protobuf/generated_message_reflection_unittest.cc b/src/google/protobuf/generated_message_reflection_unittest.cc index d5c68db61f..0b6ed8e4ad 100644 --- a/src/google/protobuf/generated_message_reflection_unittest.cc +++ b/src/google/protobuf/generated_message_reflection_unittest.cc @@ -200,6 +200,38 @@ TEST(GeneratedMessageReflectionTest, SwapWithBothSet) { EXPECT_EQ(532819, message2.optional_int32()); } +TEST(GeneratedMessageReflectionTest, SwapWithLhsCleared) { + unittest::TestAllTypes message1; + unittest::TestAllTypes message2; + + TestUtil::SetAllFields(&message1); + + // For proto2 message, for message field, Clear only reset hasbits, but + // doesn't delete the underlying field. + message1.Clear(); + + const Reflection* reflection = message1.GetReflection(); + reflection->Swap(&message1, &message2); + + TestUtil::ExpectClear(message2); +} + +TEST(GeneratedMessageReflectionTest, SwapWithRhsCleared) { + unittest::TestAllTypes message1; + unittest::TestAllTypes message2; + + TestUtil::SetAllFields(&message2); + + // For proto2 message, for message field, Clear only reset hasbits, but + // doesn't delete the underlying field. + message2.Clear(); + + const Reflection* reflection = message1.GetReflection(); + reflection->Swap(&message1, &message2); + + TestUtil::ExpectClear(message1); +} + TEST(GeneratedMessageReflectionTest, SwapExtensions) { unittest::TestAllExtensions message1; unittest::TestAllExtensions message2; diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index b4cf9291b9..07879bdcbe 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -79,7 +79,7 @@ class TcParserBase { return table->fallback(PROTOBUF_TC_PARAM_PASS); } ptr += sizeof(TagType); - hasbits |= (1 << data.hasbit_idx()); + hasbits |= (uint64_t{1} << data.hasbit_idx()); auto& field = RefAt(msg, data.offset()); if (field == nullptr) { auto arena = ctx->data().arena; @@ -111,6 +111,18 @@ class TcParserBase { template static const char* PackedFixed(PROTOBUF_TC_PARAM_DECL); + enum VarintDecode { kNoConversion = 0, kZigZag = 1 }; + template + static const char* RepeatedVarint(PROTOBUF_TC_PARAM_DECL); + template + static const char* PackedVarint(PROTOBUF_TC_PARAM_DECL); + + enum Utf8Type { kNoUtf8 = 0, kUtf8 = 1, kUtf8ValidateOnly = 2 }; + template + static const char* SingularString(PROTOBUF_TC_PARAM_DECL); + template + static const char* RepeatedString(PROTOBUF_TC_PARAM_DECL); + protected: template static T& RefAt(void* x, size_t offset) { @@ -231,6 +243,9 @@ struct TcParser final : TcParserBase { template static const char* SingularFixed(PROTOBUF_TC_PARAM_DECL); + + template + static const char* SingularVarint(PROTOBUF_TC_PARAM_DECL); }; // Declare helper functions: diff --git a/src/google/protobuf/generated_message_tctable_impl.inc b/src/google/protobuf/generated_message_tctable_impl.inc index 2e3dbf2711..e6e5dd515e 100644 --- a/src/google/protobuf/generated_message_tctable_impl.inc +++ b/src/google/protobuf/generated_message_tctable_impl.inc @@ -44,6 +44,47 @@ template const char* TcParser<4>::SingularFixed(PROTOBUF_TC_P template const char* TcParser<5>::SingularFixed(PROTOBUF_TC_PARAM_DECL); template const char* TcParserBase::RepeatedFixed(PROTOBUF_TC_PARAM_DECL); template const char* TcParserBase::PackedFixed(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); template const char* TcParser<1>::SingularFixed(PROTOBUF_TC_PARAM_DECL); template const char* TcParser<2>::SingularFixed(PROTOBUF_TC_PARAM_DECL); template const char* TcParser<3>::SingularFixed(PROTOBUF_TC_PARAM_DECL); @@ -58,6 +99,47 @@ template const char* TcParser<4>::SingularFixed(PROTOBUF_TC_ template const char* TcParser<5>::SingularFixed(PROTOBUF_TC_PARAM_DECL); template const char* TcParserBase::RepeatedFixed(PROTOBUF_TC_PARAM_DECL); template const char* TcParserBase::PackedFixed(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); #else extern template const char* TcParser<1>::SingularFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParser<2>::SingularFixed(PROTOBUF_TC_PARAM_DECL); @@ -73,6 +155,47 @@ extern template const char* TcParser<4>::SingularFixed(PROTOB extern template const char* TcParser<5>::SingularFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParserBase::RepeatedFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParserBase::PackedFixed(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParser<1>::SingularFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParser<2>::SingularFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParser<3>::SingularFixed(PROTOBUF_TC_PARAM_DECL); @@ -87,5 +210,46 @@ extern template const char* TcParser<4>::SingularFixed(PROTO extern template const char* TcParser<5>::SingularFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParserBase::RepeatedFixed(PROTOBUF_TC_PARAM_DECL); extern template const char* TcParserBase::PackedFixed(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<1>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<2>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<3>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<4>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParser<5>::SingularVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::PackedVarint(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL); +extern template const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL); #endif // clang-format on diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 9eb5d5a31e..f8a09f4e8e 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -77,7 +77,7 @@ const char* TcParser::SingularFixed(PROTOBUF_TC_PARAM_DECL) { return table->fallback(PROTOBUF_TC_PARAM_PASS); } ptr += sizeof(TagType); // Consume tag - hasbits |= (1 << data.hasbit_idx()); + hasbits |= (uint64_t{1} << data.hasbit_idx()); std::memcpy(Offset(msg, data.offset()), ptr, sizeof(LayoutType)); ptr += sizeof(LayoutType); // TailCall syncs any pending hasbits: @@ -140,6 +140,300 @@ const char* TcParserBase::PackedFixed(PROTOBUF_TC_PARAM_DECL) { static_cast*>(&field)); } +////////////////////////////////////////////////////////////////////////////// +// Varint fields +////////////////////////////////////////////////////////////////////////////// + +namespace { + +inline PROTOBUF_ALWAYS_INLINE std::pair +Parse64FallbackPair(const char* p, int64_t res1) { + auto ptr = reinterpret_cast(p); + + // The algorithm relies on sign extension for each byte to set all high bits + // when the varint continues. It also relies on asserting all of the lower + // bits for each successive byte read. This allows the result to be aggregated + // using a bitwise AND. For example: + // + // 8 1 64 57 ... 24 17 16 9 8 1 + // ptr[0] = 1aaa aaaa ; res1 = 1111 1111 ... 1111 1111 1111 1111 1aaa aaaa + // ptr[1] = 1bbb bbbb ; res2 = 1111 1111 ... 1111 1111 11bb bbbb b111 1111 + // ptr[2] = 1ccc cccc ; res3 = 0000 0000 ... 000c cccc cc11 1111 1111 1111 + // --------------------------------------------- + // res1 & res2 & res3 = 0000 0000 ... 000c cccc ccbb bbbb baaa aaaa + // + // On x86-64, a shld from a single register filled with enough 1s in the high + // bits can accomplish all this in one instruction. It so happens that res1 + // has 57 high bits of ones, which is enough for the largest shift done. + GOOGLE_DCHECK_EQ(res1 >> 7, -1); + uint64_t ones = res1; // save the high 1 bits from res1 (input to SHLD) + uint64_t byte; // the "next" 7-bit chunk, shifted (result from SHLD) + int64_t res2, res3; // accumulated result chunks +#define SHLD(n) byte = ((byte << (n * 7)) | (ones >> (64 - (n * 7)))) + + int sign_bit; +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__) + // For the first two rounds (ptr[1] and ptr[2]), micro benchmarks show a + // substantial improvement from capturing the sign from the condition code + // register on x86-64. +#define SHLD_SIGN(n) \ + asm("shldq %3, %2, %1" \ + : "=@ccs"(sign_bit), "+r"(byte) \ + : "r"(ones), "i"(n * 7)) +#else + // Generic fallback: +#define SHLD_SIGN(n) \ + do { \ + SHLD(n); \ + sign_bit = static_cast(byte) < 0; \ + } while (0) +#endif + + byte = ptr[1]; + SHLD_SIGN(1); + res2 = byte; + if (!sign_bit) goto done2; + byte = ptr[2]; + SHLD_SIGN(2); + res3 = byte; + if (!sign_bit) goto done3; + +#undef SHLD_SIGN + + // For the remainder of the chunks, check the sign of the AND result. + byte = ptr[3]; + SHLD(3); + res1 &= byte; + if (res1 >= 0) goto done4; + byte = ptr[4]; + SHLD(4); + res2 &= byte; + if (res2 >= 0) goto done5; + byte = ptr[5]; + SHLD(5); + res3 &= byte; + if (res3 >= 0) goto done6; + byte = ptr[6]; + SHLD(6); + res1 &= byte; + if (res1 >= 0) goto done7; + byte = ptr[7]; + SHLD(7); + res2 &= byte; + if (res2 >= 0) goto done8; + byte = ptr[8]; + SHLD(8); + res3 &= byte; + if (res3 >= 0) goto done9; + +#undef SHLD + + // For valid 64bit varints, the 10th byte/ptr[9] should be exactly 1. In this + // case, the continuation bit of ptr[8] already set the top bit of res3 + // correctly, so all we have to do is check that the expected case is true. + byte = ptr[9]; + if (PROTOBUF_PREDICT_TRUE(byte == 1)) goto done10; + + // A value of 0, however, represents an over-serialized varint. This case + // should not happen, but if does (say, due to a nonconforming serializer), + // deassert the continuation bit that came from ptr[8]. + if (byte == 0) { + res3 ^= static_cast(1) << 63; + goto done10; + } + + // If the 10th byte/ptr[9] itself has any other value, then it is too big to + // fit in 64 bits. If the continue bit is set, it is an unterminated varint. + return {nullptr, 0}; + +#define DONE(n) done##n : return {p + n, res1 & res2 & res3}; +done2: + return {p + 2, res1 & res2}; + DONE(3) + DONE(4) + DONE(5) + DONE(6) + DONE(7) + DONE(8) + DONE(9) + DONE(10) +#undef DONE +} + +inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p, + uint64_t* value) { + int64_t byte = static_cast(*p); + if (PROTOBUF_PREDICT_TRUE(byte >= 0)) { + *value = byte; + return p + 1; + } else { + auto tmp = Parse64FallbackPair(p, byte); + if (PROTOBUF_PREDICT_TRUE(tmp.first)) *value = tmp.second; + return tmp.first; + } +} + +} // namespace + +template +template +const char* TcParser::SingularVarint(PROTOBUF_TC_PARAM_DECL) { + if (PROTOBUF_PREDICT_FALSE(static_cast(data.coded_tag()) != 0)) { + return table->fallback(PROTOBUF_TC_PARAM_PASS); + } + ptr += sizeof(TagType); // Consume tag + hasbits |= (uint64_t{1} << data.hasbit_idx()); + uint64_t tmp; + ptr = ParseVarint(ptr, &tmp); + if (ptr == nullptr) { + return Error(PROTOBUF_TC_PARAM_PASS); + } + RefAt(msg, data.offset()) = static_cast( + zigzag ? google::protobuf::internal::WireFormatLite::ZigZagDecode64(tmp) : tmp); + PROTOBUF_MUSTTAIL return TailCall(PROTOBUF_TC_PARAM_PASS); +} + +template +PROTOBUF_NOINLINE const char* TcParserBase::RepeatedVarint( + PROTOBUF_TC_PARAM_DECL) { + if (PROTOBUF_PREDICT_FALSE(static_cast(data.coded_tag()) != 0)) { + // Try parsing as non-packed repeated: + InvertPacked(data); + if (static_cast(data.coded_tag()) == 0) { + return PackedVarint(PROTOBUF_TC_PARAM_PASS); + } else { + return table->fallback(PROTOBUF_TC_PARAM_PASS); + } + } + auto& field = RefAt>(msg, data.offset()); + auto expected_tag = UnalignedLoad(ptr); + do { + ptr += sizeof(TagType); + uint64_t tmp; + ptr = ParseVarint(ptr, &tmp); + if (ptr == nullptr) { + return Error(PROTOBUF_TC_PARAM_PASS); + } + field.Add(zigzag ? google::protobuf::internal::WireFormatLite::ZigZagDecode64(tmp) + : tmp); + if (!ctx->DataAvailable(ptr)) { + break; + } + } while (UnalignedLoad(ptr) == expected_tag); + return Return(PROTOBUF_TC_PARAM_PASS); +} + +template +PROTOBUF_NOINLINE const char* TcParserBase::PackedVarint( + PROTOBUF_TC_PARAM_DECL) { + if (PROTOBUF_PREDICT_FALSE(static_cast(data.coded_tag()) != 0)) { + InvertPacked(data); + if (static_cast(data.coded_tag()) == 0) { + return RepeatedVarint(PROTOBUF_TC_PARAM_PASS); + } else { + return table->fallback(PROTOBUF_TC_PARAM_PASS); + } + } + ptr += sizeof(TagType); + // Since ctx->ReadPackedVarint does not use TailCall or Return, sync any + // pending hasbits now: + SyncHasbits(msg, hasbits, table); + auto* field = &RefAt>(msg, data.offset()); + return ctx->ReadPackedVarint(ptr, [field](uint64_t varint) { + FieldType val; + if (zigzag) { + if (sizeof(FieldType) == 8) { + val = WireFormatLite::ZigZagDecode64(varint); + } else { + val = WireFormatLite::ZigZagDecode32(varint); + } + } else { + val = varint; + } + field->Add(val); + }); +} + +////////////////////////////////////////////////////////////////////////////// +// String/bytes fields +////////////////////////////////////////////////////////////////////////////// + +// Defined in wire_format_lite.cc +void PrintUTF8ErrorLog(const char* field_name, const char* operation_str, + bool emit_stacktrace); + +namespace { + +PROTOBUF_NOINLINE +const char* SingularStringParserFallback(ArenaStringPtr* s, const char* ptr, + EpsCopyInputStream* stream) { + int size = ReadSize(&ptr); + if (!ptr) return nullptr; + return stream->ReadString( + ptr, size, s->MutableNoArenaNoDefault(&GetEmptyStringAlreadyInited())); +} + +} // namespace + +template +const char* TcParserBase::SingularString(PROTOBUF_TC_PARAM_DECL) { + if (PROTOBUF_PREDICT_FALSE(static_cast(data.coded_tag()) != 0)) { + return table->fallback(PROTOBUF_TC_PARAM_PASS); + } + ptr += sizeof(TagType); + hasbits |= (uint64_t{1} << data.hasbit_idx()); + auto& field = RefAt(msg, data.offset()); + auto arena = ctx->data().arena; + if (arena) { + ptr = ctx->ReadArenaString(ptr, &field, arena); + } else { + ptr = SingularStringParserFallback(&field, ptr, ctx); + } + if (ptr == nullptr) return Error(PROTOBUF_TC_PARAM_PASS); + switch (utf8) { + case kNoUtf8: +#ifdef NDEBUG + case kUtf8ValidateOnly: +#endif + return Return(PROTOBUF_TC_PARAM_PASS); + default: + if (PROTOBUF_PREDICT_TRUE(IsStructurallyValidUTF8(field.Get()))) { + return Return(PROTOBUF_TC_PARAM_PASS); + } + PrintUTF8ErrorLog("unknown", "parsing", false); + return utf8 == kUtf8 ? Error(PROTOBUF_TC_PARAM_PASS) + : Return(PROTOBUF_TC_PARAM_PASS); + } +} + +template +const char* TcParserBase::RepeatedString(PROTOBUF_TC_PARAM_DECL) { + if (PROTOBUF_PREDICT_FALSE(static_cast(data.coded_tag()) != 0)) { + return table->fallback(PROTOBUF_TC_PARAM_PASS); + } + auto expected_tag = UnalignedLoad(ptr); + auto& field = RefAt>(msg, data.offset()); + do { + ptr += sizeof(TagType); + std::string* str = field.Add(); + ptr = InlineGreedyStringParser(str, ptr, ctx); + if (ptr == nullptr) { + return Error(PROTOBUF_TC_PARAM_PASS); + } + if (utf8 != kNoUtf8) { + if (PROTOBUF_PREDICT_FALSE(!IsStructurallyValidUTF8(*str))) { + PrintUTF8ErrorLog("unknown", "parsing", false); + if (utf8 == kUtf8) return Error(PROTOBUF_TC_PARAM_PASS); + } + } + if (!ctx->DataAvailable(ptr)) break; + } while (UnalignedLoad(ptr) == expected_tag); + return Return(PROTOBUF_TC_PARAM_PASS); +} + #define PROTOBUF_TCT_SOURCE #include diff --git a/src/google/protobuf/parse_context.h b/src/google/protobuf/parse_context.h index 14748f6659..d858191a88 100644 --- a/src/google/protobuf/parse_context.h +++ b/src/google/protobuf/parse_context.h @@ -166,6 +166,10 @@ class PROTOBUF_EXPORT EpsCopyInputStream { } return AppendStringFallback(ptr, size, s); } + // Implemented in arenastring.cc + PROTOBUF_MUST_USE_RESULT const char* ReadArenaString(const char* ptr, + ArenaStringPtr* s, + Arena* arena); template PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr, diff --git a/src/google/protobuf/port_def.inc b/src/google/protobuf/port_def.inc index 75c4adcfde..ca73a4ae15 100644 --- a/src/google/protobuf/port_def.inc +++ b/src/google/protobuf/port_def.inc @@ -183,7 +183,8 @@ #ifdef PROTOBUF_TAILCALL #error PROTOBUF_TAILCALL was previously defined #endif -#if __has_cpp_attribute(clang::musttail) && !defined(_ARCH_PPC) +#if __has_cpp_attribute(clang::musttail) && \ + !defined(_ARCH_PPC) && !defined(__wasm__) # ifndef PROTO2_OPENSOURCE // Compilation fails on powerpc64le: b/187985113 # endif @@ -408,6 +409,14 @@ #endif # define PROTOBUF_MUST_USE_RESULT +#ifdef PROTOBUF_MUST_USE_EXTRACT_RESULT +#error PROTOBUF_MUST_USE_EXTRACT_RESULT was previously defined +#endif + +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE +#error PROTOBUF_FORCE_COPY_IN_RELEASE was previously defined +#endif + #ifdef PROTOBUF_FORCE_COPY_IN_SWAP #error PROTOBUF_FORCE_COPY_IN_SWAP was previously defined #endif diff --git a/src/google/protobuf/port_undef.inc b/src/google/protobuf/port_undef.inc index 470547e31f..4e956d4ed1 100644 --- a/src/google/protobuf/port_undef.inc +++ b/src/google/protobuf/port_undef.inc @@ -61,6 +61,8 @@ #undef PROTOBUF_EXPORT #undef PROTOC_EXPORT #undef PROTOBUF_MUST_USE_RESULT +#undef PROTOBUF_MUST_USE_EXTRACT_RESULT +#undef PROTOBUF_FORCE_COPY_IN_RELEASE #undef PROTOBUF_FORCE_COPY_IN_SWAP #undef PROTOBUF_NAMESPACE_OPEN #undef PROTOBUF_NAMESPACE_CLOSE diff --git a/src/google/protobuf/repeated_field.h b/src/google/protobuf/repeated_field.h index a32f2d8657..3e3f601d7e 100644 --- a/src/google/protobuf/repeated_field.h +++ b/src/google/protobuf/repeated_field.h @@ -654,6 +654,14 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase { int Capacity() const; + template + static inline typename TypeHandler::Type* copy( + typename TypeHandler::Type* value) { + auto* new_value = TypeHandler::NewFromPrototype(value, nullptr); + TypeHandler::Merge(*value, new_value); + return new_value; + } + // Used for constructing iterators. void* const* raw_data() const; void** raw_mutable_data() const; @@ -2047,14 +2055,15 @@ inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseLastInternal( typename TypeHandler::Type* result = UnsafeArenaReleaseLast(); // Now perform a copy if we're on an arena. Arena* arena = GetArena(); - if (arena == NULL) { - return result; - } else { - typename TypeHandler::Type* new_result = - TypeHandler::NewFromPrototype(result, NULL); - TypeHandler::Merge(*result, new_result); - return new_result; - } + + typename TypeHandler::Type* new_result; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + new_result = copy(result); + if (arena == nullptr) delete result; +#else // PROTOBUF_FORCE_COPY_IN_RELEASE + new_result = (arena == nullptr) ? result : copy(result); +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE + return new_result; } // ReleaseLast() for types that *do not* implement merge/copy behavior -- this @@ -2064,7 +2073,7 @@ inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseLastInternal( template inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseLastInternal( std::false_type) { - GOOGLE_DCHECK(GetArena() == NULL) + GOOGLE_DCHECK(GetArena() == nullptr) << "ReleaseLast() called on a RepeatedPtrField that is on an arena, " << "with a type that does not implement MergeFrom. This is unsafe; " << "please implement MergeFrom for your type."; @@ -2254,7 +2263,7 @@ inline void RepeatedPtrField::DeleteSubrange(int start, int num) { for (int i = 0; i < num; ++i) { RepeatedPtrFieldBase::Delete(start + i); } - ExtractSubrange(start, num, NULL); + UnsafeArenaExtractSubrange(start, num, nullptr); } template @@ -2274,28 +2283,45 @@ inline void RepeatedPtrField::ExtractSubrangeInternal( GOOGLE_DCHECK_GE(num, 0); GOOGLE_DCHECK_LE(start + num, size()); - if (num > 0) { - // Save the values of the removed elements if requested. - if (elements != NULL) { - if (GetArena() != NULL) { - // If we're on an arena, we perform a copy for each element so that the - // returned elements are heap-allocated. - for (int i = 0; i < num; ++i) { - Element* element = - RepeatedPtrFieldBase::Mutable(i + start); - typename TypeHandler::Type* new_value = - TypeHandler::NewFromPrototype(element, NULL); - TypeHandler::Merge(*element, new_value); - elements[i] = new_value; - } - } else { - for (int i = 0; i < num; ++i) { - elements[i] = RepeatedPtrFieldBase::Mutable(i + start); - } - } - } + if (num == 0) return; + +#ifdef PROTOBUF_MUST_USE_EXTRACT_RESULT + GOOGLE_DCHECK_NE(elements, nullptr) + << "Releasing elements without transferring ownership is an unsafe " + "operation. Use UnsafeArenaExtractSubrange."; +#endif + if (elements == nullptr) { CloseGap(start, num); + return; + } + + Arena* arena = GetArena(); +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + // Always copy. + for (int i = 0; i < num; ++i) { + elements[i] = copy( + RepeatedPtrFieldBase::Mutable(i + start)); + } + if (arena == nullptr) { + for (int i = 0; i < num; ++i) { + delete RepeatedPtrFieldBase::Mutable(i + start); + } + } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE + // If we're on an arena, we perform a copy for each element so that the + // returned elements are heap-allocated. Otherwise, just forward it. + if (arena != nullptr) { + for (int i = 0; i < num; ++i) { + elements[i] = copy( + RepeatedPtrFieldBase::Mutable(i + start)); + } + } else { + for (int i = 0; i < num; ++i) { + elements[i] = RepeatedPtrFieldBase::Mutable(i + start); + } } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE + CloseGap(start, num); } // ExtractSubrange() implementation for types that do not implement merge/copy diff --git a/src/google/protobuf/source_context.pb.cc b/src/google/protobuf/source_context.pb.cc index 7363c3d6f9..3425dd48bd 100644 --- a/src/google/protobuf/source_context.pb.cc +++ b/src/google/protobuf/source_context.pb.cc @@ -180,7 +180,7 @@ failure: (void) cached_has_bits; // string file_name = 1; - if (!this->file_name().empty()) { + if (!this->_internal_file_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_file_name().data(), static_cast(this->_internal_file_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -206,7 +206,7 @@ size_t SourceContext::ByteSizeLong() const { (void) cached_has_bits; // string file_name = 1; - if (!this->file_name().empty()) { + if (!this->_internal_file_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_file_name()); @@ -240,7 +240,7 @@ void SourceContext::MergeFrom(const SourceContext& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!from.file_name().empty()) { + if (!from._internal_file_name().empty()) { _internal_set_file_name(from._internal_file_name()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); diff --git a/src/google/protobuf/stubs/substitute.cc b/src/google/protobuf/stubs/substitute.cc index 92107416eb..d301682ee3 100644 --- a/src/google/protobuf/stubs/substitute.cc +++ b/src/google/protobuf/stubs/substitute.cc @@ -52,15 +52,15 @@ static int CountSubstituteArgs(const SubstituteArg* const* args_array) { return count; } -std::string Substitute(const char* format, const SubstituteArg& arg0, +std::string Substitute(const std::string& format, const SubstituteArg& arg0, const SubstituteArg& arg1, const SubstituteArg& arg2, const SubstituteArg& arg3, const SubstituteArg& arg4, const SubstituteArg& arg5, const SubstituteArg& arg6, const SubstituteArg& arg7, const SubstituteArg& arg8, const SubstituteArg& arg9) { std::string result; - SubstituteAndAppend(&result, format, arg0, arg1, arg2, arg3, arg4, - arg5, arg6, arg7, arg8, arg9); + SubstituteAndAppend(&result, format.c_str(), arg0, arg1, arg2, arg3, arg4, + arg5, arg6, arg7, arg8, arg9); return result; } diff --git a/src/google/protobuf/stubs/substitute.h b/src/google/protobuf/stubs/substitute.h index d4e72e1c51..0f851de096 100644 --- a/src/google/protobuf/stubs/substitute.h +++ b/src/google/protobuf/stubs/substitute.h @@ -31,10 +31,12 @@ // Author: kenton@google.com (Kenton Varda) // from google3/strings/substitute.h -#include #include +#include #include +#include + #ifndef GOOGLE_PROTOBUF_STUBS_SUBSTITUTE_H_ #define GOOGLE_PROTOBUF_STUBS_SUBSTITUTE_H_ @@ -92,6 +94,8 @@ class SubstituteArg { : text_(value), size_(strlen(text_)) {} inline SubstituteArg(const std::string& value) : text_(value.data()), size_(value.size()) {} + inline SubstituteArg(const StringPiece value) + : text_(value.data()), size_(value.size()) {} // Indicates that no argument was given. inline explicit SubstituteArg() @@ -140,7 +144,7 @@ class SubstituteArg { } // namespace internal PROTOBUF_EXPORT std::string Substitute( - const char* format, + const std::string& format, const internal::SubstituteArg& arg0 = internal::SubstituteArg(), const internal::SubstituteArg& arg1 = internal::SubstituteArg(), const internal::SubstituteArg& arg2 = internal::SubstituteArg(), diff --git a/src/google/protobuf/timestamp.pb.cc b/src/google/protobuf/timestamp.pb.cc index 8441f47144..f87380c292 100644 --- a/src/google/protobuf/timestamp.pb.cc +++ b/src/google/protobuf/timestamp.pb.cc @@ -189,13 +189,13 @@ failure: (void) cached_has_bits; // int64 seconds = 1; - if (this->seconds() != 0) { + if (this->_internal_seconds() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(1, this->_internal_seconds(), target); } // int32 nanos = 2; - if (this->nanos() != 0) { + if (this->_internal_nanos() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->_internal_nanos(), target); } @@ -217,14 +217,14 @@ size_t Timestamp::ByteSizeLong() const { (void) cached_has_bits; // int64 seconds = 1; - if (this->seconds() != 0) { + if (this->_internal_seconds() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( this->_internal_seconds()); } // int32 nanos = 2; - if (this->nanos() != 0) { + if (this->_internal_nanos() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( this->_internal_nanos()); @@ -258,10 +258,10 @@ void Timestamp::MergeFrom(const Timestamp& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.seconds() != 0) { + if (from._internal_seconds() != 0) { _internal_set_seconds(from._internal_seconds()); } - if (from.nanos() != 0) { + if (from._internal_nanos() != 0) { _internal_set_nanos(from._internal_nanos()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); diff --git a/src/google/protobuf/type.pb.cc b/src/google/protobuf/type.pb.cc index 04b2b5f6d9..06d8d7db48 100644 --- a/src/google/protobuf/type.pb.cc +++ b/src/google/protobuf/type.pb.cc @@ -527,7 +527,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -563,7 +563,7 @@ failure: } // .google.protobuf.SourceContext source_context = 5; - if (this->has_source_context()) { + if (this->_internal_has_source_context()) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: InternalWriteMessage( @@ -571,7 +571,7 @@ failure: } // .google.protobuf.Syntax syntax = 6; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( 6, this->_internal_syntax(), target); @@ -616,21 +616,21 @@ size_t Type::ByteSizeLong() const { } // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // .google.protobuf.SourceContext source_context = 5; - if (this->has_source_context()) { + if (this->_internal_has_source_context()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *source_context_); } // .google.protobuf.Syntax syntax = 6; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_syntax()); } @@ -666,13 +666,13 @@ void Type::MergeFrom(const Type& from) { fields_.MergeFrom(from.fields_); oneofs_.MergeFrom(from.oneofs_); options_.MergeFrom(from.options_); - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (from.has_source_context()) { + if (from._internal_has_source_context()) { _internal_mutable_source_context()->PROTOBUF_NAMESPACE_ID::SourceContext::MergeFrom(from._internal_source_context()); } - if (from.syntax() != 0) { + if (from._internal_syntax() != 0) { _internal_set_syntax(from._internal_syntax()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -934,27 +934,27 @@ failure: (void) cached_has_bits; // .google.protobuf.Field.Kind kind = 1; - if (this->kind() != 0) { + if (this->_internal_kind() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( 1, this->_internal_kind(), target); } // .google.protobuf.Field.Cardinality cardinality = 2; - if (this->cardinality() != 0) { + if (this->_internal_cardinality() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( 2, this->_internal_cardinality(), target); } // int32 number = 3; - if (this->number() != 0) { + if (this->_internal_number() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(3, this->_internal_number(), target); } // string name = 4; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -964,7 +964,7 @@ failure: } // string type_url = 6; - if (!this->type_url().empty()) { + if (!this->_internal_type_url().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_type_url().data(), static_cast(this->_internal_type_url().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -974,13 +974,13 @@ failure: } // int32 oneof_index = 7; - if (this->oneof_index() != 0) { + if (this->_internal_oneof_index() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(7, this->_internal_oneof_index(), target); } // bool packed = 8; - if (this->packed() != 0) { + if (this->_internal_packed() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(8, this->_internal_packed(), target); } @@ -994,7 +994,7 @@ failure: } // string json_name = 10; - if (!this->json_name().empty()) { + if (!this->_internal_json_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_json_name().data(), static_cast(this->_internal_json_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1004,7 +1004,7 @@ failure: } // string default_value = 11; - if (!this->default_value().empty()) { + if (!this->_internal_default_value().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_default_value().data(), static_cast(this->_internal_default_value().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1037,61 +1037,61 @@ size_t Field::ByteSizeLong() const { } // string name = 4; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // string type_url = 6; - if (!this->type_url().empty()) { + if (!this->_internal_type_url().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_type_url()); } // string json_name = 10; - if (!this->json_name().empty()) { + if (!this->_internal_json_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_json_name()); } // string default_value = 11; - if (!this->default_value().empty()) { + if (!this->_internal_default_value().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_default_value()); } // .google.protobuf.Field.Kind kind = 1; - if (this->kind() != 0) { + if (this->_internal_kind() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_kind()); } // .google.protobuf.Field.Cardinality cardinality = 2; - if (this->cardinality() != 0) { + if (this->_internal_cardinality() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_cardinality()); } // int32 number = 3; - if (this->number() != 0) { + if (this->_internal_number() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( this->_internal_number()); } // int32 oneof_index = 7; - if (this->oneof_index() != 0) { + if (this->_internal_oneof_index() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( this->_internal_oneof_index()); } // bool packed = 8; - if (this->packed() != 0) { + if (this->_internal_packed() != 0) { total_size += 1 + 1; } @@ -1124,31 +1124,31 @@ void Field::MergeFrom(const Field& from) { (void) cached_has_bits; options_.MergeFrom(from.options_); - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (!from.type_url().empty()) { + if (!from._internal_type_url().empty()) { _internal_set_type_url(from._internal_type_url()); } - if (!from.json_name().empty()) { + if (!from._internal_json_name().empty()) { _internal_set_json_name(from._internal_json_name()); } - if (!from.default_value().empty()) { + if (!from._internal_default_value().empty()) { _internal_set_default_value(from._internal_default_value()); } - if (from.kind() != 0) { + if (from._internal_kind() != 0) { _internal_set_kind(from._internal_kind()); } - if (from.cardinality() != 0) { + if (from._internal_cardinality() != 0) { _internal_set_cardinality(from._internal_cardinality()); } - if (from.number() != 0) { + if (from._internal_number() != 0) { _internal_set_number(from._internal_number()); } - if (from.oneof_index() != 0) { + if (from._internal_oneof_index() != 0) { _internal_set_oneof_index(from._internal_oneof_index()); } - if (from.packed() != 0) { + if (from._internal_packed() != 0) { _internal_set_packed(from._internal_packed()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1382,7 +1382,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1408,7 +1408,7 @@ failure: } // .google.protobuf.SourceContext source_context = 4; - if (this->has_source_context()) { + if (this->_internal_has_source_context()) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: InternalWriteMessage( @@ -1416,7 +1416,7 @@ failure: } // .google.protobuf.Syntax syntax = 5; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( 5, this->_internal_syntax(), target); @@ -1453,21 +1453,21 @@ size_t Enum::ByteSizeLong() const { } // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // .google.protobuf.SourceContext source_context = 4; - if (this->has_source_context()) { + if (this->_internal_has_source_context()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *source_context_); } // .google.protobuf.Syntax syntax = 5; - if (this->syntax() != 0) { + if (this->_internal_syntax() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_syntax()); } @@ -1502,13 +1502,13 @@ void Enum::MergeFrom(const Enum& from) { enumvalue_.MergeFrom(from.enumvalue_); options_.MergeFrom(from.options_); - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (from.has_source_context()) { + if (from._internal_has_source_context()) { _internal_mutable_source_context()->PROTOBUF_NAMESPACE_ID::SourceContext::MergeFrom(from._internal_source_context()); } - if (from.syntax() != 0) { + if (from._internal_syntax() != 0) { _internal_set_syntax(from._internal_syntax()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1681,7 +1681,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1691,7 +1691,7 @@ failure: } // int32 number = 2; - if (this->number() != 0) { + if (this->_internal_number() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->_internal_number(), target); } @@ -1728,14 +1728,14 @@ size_t EnumValue::ByteSizeLong() const { } // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // int32 number = 2; - if (this->number() != 0) { + if (this->_internal_number() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( this->_internal_number()); @@ -1770,10 +1770,10 @@ void EnumValue::MergeFrom(const EnumValue& from) { (void) cached_has_bits; options_.MergeFrom(from.options_); - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (from.number() != 0) { + if (from._internal_number() != 0) { _internal_set_number(from._internal_number()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1944,7 +1944,7 @@ failure: (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_name().data(), static_cast(this->_internal_name().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1954,7 +1954,7 @@ failure: } // .google.protobuf.Any value = 2; - if (this->has_value()) { + if (this->_internal_has_value()) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: InternalWriteMessage( @@ -1978,14 +1978,14 @@ size_t Option::ByteSizeLong() const { (void) cached_has_bits; // string name = 1; - if (!this->name().empty()) { + if (!this->_internal_name().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_name()); } // .google.protobuf.Any value = 2; - if (this->has_value()) { + if (this->_internal_has_value()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *value_); @@ -2019,10 +2019,10 @@ void Option::MergeFrom(const Option& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!from.name().empty()) { + if (!from._internal_name().empty()) { _internal_set_name(from._internal_name()); } - if (from.has_value()) { + if (from._internal_has_value()) { _internal_mutable_value()->PROTOBUF_NAMESPACE_ID::Any::MergeFrom(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); diff --git a/src/google/protobuf/type.pb.h b/src/google/protobuf/type.pb.h index 55edd948c3..f6c83a1cfd 100644 --- a/src/google/protobuf/type.pb.h +++ b/src/google/protobuf/type.pb.h @@ -1581,9 +1581,15 @@ inline PROTOBUF_NAMESPACE_ID::SourceContext* Type::release_source_context() { PROTOBUF_NAMESPACE_ID::SourceContext* temp = source_context_; source_context_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::SourceContext* Type::unsafe_arena_release_source_context() { @@ -2139,9 +2145,15 @@ inline PROTOBUF_NAMESPACE_ID::SourceContext* Enum::release_source_context() { PROTOBUF_NAMESPACE_ID::SourceContext* temp = source_context_; source_context_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::SourceContext* Enum::unsafe_arena_release_source_context() { @@ -2399,9 +2411,15 @@ inline PROTOBUF_NAMESPACE_ID::Any* Option::release_value() { PROTOBUF_NAMESPACE_ID::Any* temp = value_; value_ = nullptr; +#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE + auto* old = reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(temp); + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + if (GetArenaForAllocation() == nullptr) { delete old; } +#else // PROTOBUF_FORCE_COPY_IN_RELEASE if (GetArenaForAllocation() != nullptr) { temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); } +#endif // !PROTOBUF_FORCE_COPY_IN_RELEASE return temp; } inline PROTOBUF_NAMESPACE_ID::Any* Option::unsafe_arena_release_value() { diff --git a/src/google/protobuf/wrappers.pb.cc b/src/google/protobuf/wrappers.pb.cc index 2732140153..998758121d 100644 --- a/src/google/protobuf/wrappers.pb.cc +++ b/src/google/protobuf/wrappers.pb.cc @@ -339,7 +339,7 @@ failure: (void) cached_has_bits; // double value = 1; - if (!(this->value() <= 0 && this->value() >= 0)) { + if (!(this->_internal_value() <= 0 && this->_internal_value() >= 0)) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteDoubleToArray(1, this->_internal_value(), target); } @@ -361,7 +361,7 @@ size_t DoubleValue::ByteSizeLong() const { (void) cached_has_bits; // double value = 1; - if (!(this->value() <= 0 && this->value() >= 0)) { + if (!(this->_internal_value() <= 0 && this->_internal_value() >= 0)) { total_size += 1 + 8; } @@ -393,7 +393,7 @@ void DoubleValue::MergeFrom(const DoubleValue& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!(from.value() <= 0 && from.value() >= 0)) { + if (!(from._internal_value() <= 0 && from._internal_value() >= 0)) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -522,7 +522,7 @@ failure: (void) cached_has_bits; // float value = 1; - if (!(this->value() <= 0 && this->value() >= 0)) { + if (!(this->_internal_value() <= 0 && this->_internal_value() >= 0)) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteFloatToArray(1, this->_internal_value(), target); } @@ -544,7 +544,7 @@ size_t FloatValue::ByteSizeLong() const { (void) cached_has_bits; // float value = 1; - if (!(this->value() <= 0 && this->value() >= 0)) { + if (!(this->_internal_value() <= 0 && this->_internal_value() >= 0)) { total_size += 1 + 4; } @@ -576,7 +576,7 @@ void FloatValue::MergeFrom(const FloatValue& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!(from.value() <= 0 && from.value() >= 0)) { + if (!(from._internal_value() <= 0 && from._internal_value() >= 0)) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -705,7 +705,7 @@ failure: (void) cached_has_bits; // int64 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(1, this->_internal_value(), target); } @@ -727,7 +727,7 @@ size_t Int64Value::ByteSizeLong() const { (void) cached_has_bits; // int64 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( this->_internal_value()); @@ -761,7 +761,7 @@ void Int64Value::MergeFrom(const Int64Value& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.value() != 0) { + if (from._internal_value() != 0) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -890,7 +890,7 @@ failure: (void) cached_has_bits; // uint64 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt64ToArray(1, this->_internal_value(), target); } @@ -912,7 +912,7 @@ size_t UInt64Value::ByteSizeLong() const { (void) cached_has_bits; // uint64 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt64Size( this->_internal_value()); @@ -946,7 +946,7 @@ void UInt64Value::MergeFrom(const UInt64Value& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.value() != 0) { + if (from._internal_value() != 0) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1075,7 +1075,7 @@ failure: (void) cached_has_bits; // int32 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(1, this->_internal_value(), target); } @@ -1097,7 +1097,7 @@ size_t Int32Value::ByteSizeLong() const { (void) cached_has_bits; // int32 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( this->_internal_value()); @@ -1131,7 +1131,7 @@ void Int32Value::MergeFrom(const Int32Value& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.value() != 0) { + if (from._internal_value() != 0) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1260,7 +1260,7 @@ failure: (void) cached_has_bits; // uint32 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt32ToArray(1, this->_internal_value(), target); } @@ -1282,7 +1282,7 @@ size_t UInt32Value::ByteSizeLong() const { (void) cached_has_bits; // uint32 value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt32Size( this->_internal_value()); @@ -1316,7 +1316,7 @@ void UInt32Value::MergeFrom(const UInt32Value& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.value() != 0) { + if (from._internal_value() != 0) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1445,7 +1445,7 @@ failure: (void) cached_has_bits; // bool value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { target = stream->EnsureSpace(target); target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(1, this->_internal_value(), target); } @@ -1467,7 +1467,7 @@ size_t BoolValue::ByteSizeLong() const { (void) cached_has_bits; // bool value = 1; - if (this->value() != 0) { + if (this->_internal_value() != 0) { total_size += 1 + 1; } @@ -1499,7 +1499,7 @@ void BoolValue::MergeFrom(const BoolValue& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.value() != 0) { + if (from._internal_value() != 0) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1635,7 +1635,7 @@ failure: (void) cached_has_bits; // string value = 1; - if (!this->value().empty()) { + if (!this->_internal_value().empty()) { ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::VerifyUtf8String( this->_internal_value().data(), static_cast(this->_internal_value().length()), ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::SERIALIZE, @@ -1661,7 +1661,7 @@ size_t StringValue::ByteSizeLong() const { (void) cached_has_bits; // string value = 1; - if (!this->value().empty()) { + if (!this->_internal_value().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( this->_internal_value()); @@ -1695,7 +1695,7 @@ void StringValue::MergeFrom(const StringValue& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!from.value().empty()) { + if (!from._internal_value().empty()) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_); @@ -1834,7 +1834,7 @@ failure: (void) cached_has_bits; // bytes value = 1; - if (!this->value().empty()) { + if (!this->_internal_value().empty()) { target = stream->WriteBytesMaybeAliased( 1, this->_internal_value(), target); } @@ -1856,7 +1856,7 @@ size_t BytesValue::ByteSizeLong() const { (void) cached_has_bits; // bytes value = 1; - if (!this->value().empty()) { + if (!this->_internal_value().empty()) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::BytesSize( this->_internal_value()); @@ -1890,7 +1890,7 @@ void BytesValue::MergeFrom(const BytesValue& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (!from.value().empty()) { + if (!from._internal_value().empty()) { _internal_set_value(from._internal_value()); } _internal_metadata_.MergeFrom<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(from._internal_metadata_);