diff --git a/CHANGES.txt b/CHANGES.txt index a7d74a7eee..aca85e807b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,7 @@ 2022-07-01 Unreleased version C++ + * cpp_generated_lib_linked support is removed in protoc * Reduced .pb.o object file size slightly by explicitly instantiating InternalMetadata templates in the runtime. * Add C++20 keywords guarded by PROTOBUF_FUTURE_CPP20_KEYWORDS @@ -11,6 +12,7 @@ * Hide C++ RepeatedField::UnsafeArenaSwap Kotlin + * Suppress deprecation warnings in Kotlin generated code. * Kotlin generated code comments now use kdoc format instead of javadoc. * Escape keywords in package names in proto generated code * Add Kotlin enum int value getters and setters @@ -26,6 +28,14 @@ Compiler * Print full path name of source .proto file on error +2022-08-09 version 21.5 (C++/Java/Python/PHP/Objective-C/C#/Ruby) + + PHP + * Added getContainingOneof and getRealContainingOneof to descriptor. + * fix PHP readonly legacy files for nested messages + + Python + * Fixed comparison of maps in Python. 2022-07-25 version 21.4 (C++/Java/Python/PHP/Objective-C/C#/Ruby) diff --git a/Protobuf.podspec b/Protobuf.podspec index e41f39a704..eb87a8f81a 100644 --- a/Protobuf.podspec +++ b/Protobuf.podspec @@ -5,7 +5,7 @@ # dependent projects use the :git notation to refer to the library. Pod::Spec.new do |s| s.name = 'Protobuf' - s.version = '3.21.4' + s.version = '3.21.5' s.summary = 'Protocol Buffers v.3 runtime library for Objective-C.' s.homepage = 'https://github.com/protocolbuffers/protobuf' s.license = 'BSD-3-Clause' diff --git a/csharp/Google.Protobuf.Tools.nuspec b/csharp/Google.Protobuf.Tools.nuspec index 90033cfae8..5b71f9f040 100644 --- a/csharp/Google.Protobuf.Tools.nuspec +++ b/csharp/Google.Protobuf.Tools.nuspec @@ -5,7 +5,7 @@ Google Protocol Buffers tools Tools for Protocol Buffers - Google's data interchange format. See project site for more info. - 3.21.4 + 3.21.5 Google Inc. protobuf-packages https://github.com/protocolbuffers/protobuf/blob/main/LICENSE diff --git a/csharp/build_release.sh b/csharp/build_release.sh index 93f318f388..f9fff223cb 100755 --- a/csharp/build_release.sh +++ b/csharp/build_release.sh @@ -12,6 +12,3 @@ dotnet nuget locals all --clear # Builds Google.Protobuf NuGet packages dotnet restore src/Google.Protobuf.sln dotnet pack -c Release src/Google.Protobuf.sln -p:ContinuousIntegrationBuild=true - -# This requires built protoc executables as specified in the nusepc -nuget pack Google.Protobuf.Tools.nuspec diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj index c95ecad94e..eb5df6fa4d 100644 --- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj +++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj @@ -4,7 +4,7 @@ C# runtime library for Protocol Buffers - Google's data interchange format. Copyright 2015, Google Inc. Google Protocol Buffers - 3.21.4 + 3.21.5 10.0 Google Inc. netstandard1.1;netstandard2.0;net45;net50 diff --git a/docs/options.md b/docs/options.md index 2175c2619e..d7c5cb96da 100644 --- a/docs/options.md +++ b/docs/options.md @@ -336,3 +336,7 @@ with info about your project (name and website) so we can add an entry for you. 1. Protoc-gen-referential-integrity * Website: https://github.com/ComponentCorp/protoc-gen-referential-integrity * Extension: 1149 + +1. Oclea Service Layer RPC + * Website: https://oclea.com/ + * Extension: 1150 diff --git a/examples/Makefile b/examples/Makefile index 1ed7f67fd1..2a64b64fe3 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -13,13 +13,14 @@ python: add_person_python list_people_python clean: rm -f add_person_cpp list_people_cpp add_person_java list_people_java add_person_python list_people_python - rm -f javac_middleman AddPerson*.class ListPeople*.class com/example/tutorial/*.class - rm -f protoc_middleman addressbook.pb.cc addressbook.pb.h addressbook_pb2.py com/example/tutorial/AddressBookProtos.java + rm -f javac_middleman AddPerson*.class ListPeople*.class com/example/tutorial/protos/*.class + rm -f protoc_middleman addressbook.pb.cc addressbook.pb.h addressbook_pb2.py com/example/tutorial/protos/*.java rm -f *.pyc rm -f go/tutorialpb/*.pb.go add_person_go list_people_go rm -f protoc_middleman_dart dart_tutorial/*.pb*.dart rmdir dart_tutorial 2>/dev/null || true rmdir tutorial 2>/dev/null || true + rmdir com/example/tutorial/protos 2>/dev/null || true rmdir com/example/tutorial 2>/dev/null || true rmdir com/example 2>/dev/null || true rmdir com 2>/dev/null || true @@ -63,7 +64,7 @@ list_people_gotest: go/tutorialpb/addressbook.pb.go cd go && go test ./cmd/list_people javac_middleman: AddPerson.java ListPeople.java protoc_middleman - javac -cp $$CLASSPATH AddPerson.java ListPeople.java com/example/tutorial/AddressBookProtos.java + javac -cp $$CLASSPATH AddPerson.java ListPeople.java com/example/tutorial/protos/*.java @touch javac_middleman add_person_java: javac_middleman diff --git a/java/README.md b/java/README.md index 5e3ded418f..3a022f0929 100644 --- a/java/README.md +++ b/java/README.md @@ -23,7 +23,7 @@ If you are using Maven, use the following: com.google.protobuf protobuf-java - 3.21.4 + 3.21.5 ``` @@ -37,7 +37,7 @@ protobuf-java-util package: com.google.protobuf protobuf-java-util - 3.21.4 + 3.21.5 ``` @@ -45,7 +45,7 @@ protobuf-java-util package: If you are using Gradle, add the following to your `build.gradle` file's dependencies: ``` - implementation 'com.google.protobuf:protobuf-java:3.21.4' + implementation 'com.google.protobuf:protobuf-java:3.21.5' ``` Again, be sure to check that the version number matches (or is newer than) the version number of protoc that you are using. diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 4cccfb3545..4504729f9c 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-bom - 3.21.4 + 3.21.5 pom Protocol Buffers [BOM] diff --git a/java/core/pom.xml b/java/core/pom.xml index d5eacd6a10..9315004516 100644 --- a/java/core/pom.xml +++ b/java/core/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-parent - 3.21.4 + 3.21.5 protobuf-java diff --git a/java/core/src/main/java/com/google/protobuf/CheckReturnValue.java b/java/core/src/main/java/com/google/protobuf/CheckReturnValue.java index 38c83d898c..99abc32888 100644 --- a/java/core/src/main/java/com/google/protobuf/CheckReturnValue.java +++ b/java/core/src/main/java/com/google/protobuf/CheckReturnValue.java @@ -41,13 +41,13 @@ import java.lang.annotation.Retention; import java.lang.annotation.Target; /** - * Indicates that the return value of the annotated method must be checked. An error is triggered - * when one of these methods is called but the result is not used. + * Indicates that the return value of the annotated method must be used. An error is triggered when + * one of these methods is called but the result is not used. * *

{@code @CheckReturnValue} may be applied to a class or package to indicate that all methods in - * that class or package must have their return values checked. For convenience, we provide an - * annotation, {@link CanIgnoreReturnValue}, to exempt specific methods or classes from this - * behavior. + * that class (including indirectly; that is, methods of inner classes within the annotated class) + * or package must have their return values used. For convenience, we provide an annotation, {@link + * CanIgnoreReturnValue}, to exempt specific methods or classes from this behavior. */ @Documented @Target({METHOD, CONSTRUCTOR, TYPE, PACKAGE}) diff --git a/java/core/src/main/java/com/google/protobuf/ExtensionRegistryLite.java b/java/core/src/main/java/com/google/protobuf/ExtensionRegistryLite.java index caa58e1ada..4b2aa07628 100644 --- a/java/core/src/main/java/com/google/protobuf/ExtensionRegistryLite.java +++ b/java/core/src/main/java/com/google/protobuf/ExtensionRegistryLite.java @@ -123,16 +123,15 @@ public class ExtensionRegistryLite { * ExtensionRegistry} (if the full (non-Lite) proto libraries are available). */ public static ExtensionRegistryLite getEmptyRegistry() { + if (!doFullRuntimeInheritanceCheck) { + return EMPTY_REGISTRY_LITE; + } ExtensionRegistryLite result = emptyRegistry; if (result == null) { synchronized (ExtensionRegistryLite.class) { result = emptyRegistry; if (result == null) { - result = - emptyRegistry = - doFullRuntimeInheritanceCheck - ? ExtensionRegistryFactory.createEmpty() - : EMPTY_REGISTRY_LITE; + result = emptyRegistry = ExtensionRegistryFactory.createEmpty(); } } } diff --git a/java/kotlin-lite/pom.xml b/java/kotlin-lite/pom.xml index 2dc430907e..03f28f5901 100644 --- a/java/kotlin-lite/pom.xml +++ b/java/kotlin-lite/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-parent - 3.21.4 + 3.21.5 protobuf-kotlin-lite diff --git a/java/kotlin/pom.xml b/java/kotlin/pom.xml index 7a350f7853..f2d034e9c8 100644 --- a/java/kotlin/pom.xml +++ b/java/kotlin/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-parent - 3.21.4 + 3.21.5 protobuf-kotlin diff --git a/java/lite.md b/java/lite.md index d4b4fc87b2..2445ec3a88 100644 --- a/java/lite.md +++ b/java/lite.md @@ -29,7 +29,7 @@ protobuf Java Lite runtime. If you are using Maven, include the following: com.google.protobuf protobuf-javalite - 3.21.4 + 3.21.5 ``` diff --git a/java/lite/pom.xml b/java/lite/pom.xml index 20381c65ba..c5ef4abc0d 100644 --- a/java/lite/pom.xml +++ b/java/lite/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-parent - 3.21.4 + 3.21.5 protobuf-javalite diff --git a/java/pom.xml b/java/pom.xml index 99abcf86a9..c086992432 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-parent - 3.21.4 + 3.21.5 pom Protocol Buffers [Parent] diff --git a/java/util/pom.xml b/java/util/pom.xml index f634047c6d..987163247d 100644 --- a/java/util/pom.xml +++ b/java/util/pom.xml @@ -4,7 +4,7 @@ com.google.protobuf protobuf-parent - 3.21.4 + 3.21.5 protobuf-java-util diff --git a/java/util/src/main/java/com/google/protobuf/util/Timestamps.java b/java/util/src/main/java/com/google/protobuf/util/Timestamps.java index ea8a9334bc..0b09fccd2a 100644 --- a/java/util/src/main/java/com/google/protobuf/util/Timestamps.java +++ b/java/util/src/main/java/com/google/protobuf/util/Timestamps.java @@ -344,7 +344,7 @@ public final class Timestamps { if (date instanceof java.sql.Timestamp) { java.sql.Timestamp sqlTimestamp = (java.sql.Timestamp) date; long time = sqlTimestamp.getTime(); - long integralSeconds = (time < 0) ? time / 1000L - 1 : time / 1000L ; // truncate the fractional seconds + long integralSeconds = (time < 0 && time % 1000 != 0) ? time / 1000L - 1 : time / 1000L ; // truncate the fractional seconds return Timestamp.newBuilder() .setSeconds(integralSeconds) .setNanos(sqlTimestamp.getNanos()) diff --git a/java/util/src/test/java/com/google/protobuf/util/TimestampsTest.java b/java/util/src/test/java/com/google/protobuf/util/TimestampsTest.java index 7d0d821c3f..51f7a85b49 100644 --- a/java/util/src/test/java/com/google/protobuf/util/TimestampsTest.java +++ b/java/util/src/test/java/com/google/protobuf/util/TimestampsTest.java @@ -483,6 +483,13 @@ public class TimestampsTest { assertThat(Timestamps.toString(timestamp)).isEqualTo("1969-12-31T23:59:58.889Z"); } + @Test + public void testFromSqlTimestamp_beforeEpochWholeSecond() { + Date date = new java.sql.Timestamp(-2000); + Timestamp timestamp = Timestamps.fromDate(date); + assertThat(Timestamps.toString(timestamp)).isEqualTo("1969-12-31T23:59:58Z"); + } + @Test public void testTimeOperations() throws Exception { Timestamp start = Timestamps.parse("0001-01-01T00:00:00Z"); diff --git a/kokoro/windows/bazel/build.bat b/kokoro/windows/bazel/build.bat index 52b83f4666..73eb0fdea0 100644 --- a/kokoro/windows/bazel/build.bat +++ b/kokoro/windows/bazel/build.bat @@ -1,4 +1,30 @@ @rem enter repo root cd /d %~dp0\..\..\.. -@rem TODO(mkruskal) Implement tests +call kokoro\windows\prepare_build_win64.bat || goto :error + +@rem Allow Bazel to create short paths. +fsutil 8dot3name set 0 + +@rem TODO(b/241475022) Use docker to guarantee better stability. + +@rem Reinstall Bazel due to corupt installation in kokoro. +bazel version +choco install bazel -y -i +bazel version + +@rem Make paths as short as possible to avoid long path issues. +set BAZEL_STARTUP=--output_user_root=C:/tmp --windows_enable_symlinks +set BAZEL_FLAGS=--enable_runfiles --keep_going --test_output=streamed --verbose_failures + +@rem Build libraries first. +bazel %BAZEL_STARTUP% build //:protoc //:protobuf //:protobuf_lite %BAZEL_FLAGS% || goto :error + +@rem Run C++ tests. +bazel %BAZEL_STARTUP% test //src/... %BAZEL_FLAGS% || goto :error + +goto :EOF + +:error +echo Failed! +exit /b 1 diff --git a/kokoro/windows/bazel/continuous.cfg b/kokoro/windows/bazel/continuous.cfg index 37e89e068b..5978a7aa0d 100644 --- a/kokoro/windows/bazel/continuous.cfg +++ b/kokoro/windows/bazel/continuous.cfg @@ -1,5 +1,5 @@ # Config file for running tests in Kokoro # Location of the build script in repository -build_file: "protobuf/kokoro/windows/cmake/build.bat" +build_file: "protobuf/kokoro/windows/bazel/build.bat" timeout_mins: 1440 diff --git a/kokoro/windows/bazel/presubmit.cfg b/kokoro/windows/bazel/presubmit.cfg index 37e89e068b..5978a7aa0d 100644 --- a/kokoro/windows/bazel/presubmit.cfg +++ b/kokoro/windows/bazel/presubmit.cfg @@ -1,5 +1,5 @@ # Config file for running tests in Kokoro # Location of the build script in repository -build_file: "protobuf/kokoro/windows/cmake/build.bat" +build_file: "protobuf/kokoro/windows/bazel/build.bat" timeout_mins: 1440 diff --git a/kokoro/windows/cmake/build.bat b/kokoro/windows/cmake/build.bat index 52b83f4666..29d012ba1c 100644 --- a/kokoro/windows/cmake/build.bat +++ b/kokoro/windows/cmake/build.bat @@ -1,4 +1,32 @@ @rem enter repo root cd /d %~dp0\..\..\.. -@rem TODO(mkruskal) Implement tests +call kokoro\windows\prepare_build_win64.bat || goto :error + +@rem TODO(b/241475022) Use docker to guarantee better stability. +@rem TODO(b/241484899) Run conformance tests in windows. + +md build -ea 0 +md %KOKORO_ARTIFACTS_DIR%\logs -ea 0 + +cd build + +cmake .. ^ + -G "Visual Studio 15 2017" -A x64 ^ + -Dprotobuf_BUILD_CONFORMANCE=OFF ^ + -Dprotobuf_WITH_ZLIB=OFF ^ + -Dprotobuf_TEST_XML_OUTDIR=%KOKORO_ARTIFACTS_DIR%\logs\ || goto :error + +cmake --build . || goto :error + +ctest --verbose -C Debug || goto :error + +goto :success + +:error +cd /d %~dp0\..\..\.. +echo Failed! +exit /b 1 + +:success +cd .. diff --git a/kokoro/windows/cmake_install/build.bat b/kokoro/windows/cmake_install/build.bat index 52b83f4666..5392f9e664 100644 --- a/kokoro/windows/cmake_install/build.bat +++ b/kokoro/windows/cmake_install/build.bat @@ -1,4 +1,45 @@ @rem enter repo root cd /d %~dp0\..\..\.. -@rem TODO(mkruskal) Implement tests +call kokoro\windows\prepare_build_win64.bat || goto :error + +@rem TODO(b/241475022) Use docker to guarantee better stability. +@rem TODO(b/241484899) Run conformance tests in windows. + +md build -ea 0 +md %KOKORO_ARTIFACTS_DIR%\logs -ea 0 + +cd build + +@rem First install protobuf from source. +cmake .. ^ + -G "Visual Studio 15 2017" -A x64 ^ + -Dprotobuf_BUILD_CONFORMANCE=OFF ^ + -Dprotobuf_WITH_ZLIB=OFF || goto :error + +cmake --build . --target install || goto :error + +@rem Next run tests forcing the use of our installation. + +rm -rf * + +cmake .. ^ + -G "Visual Studio 15 2017" -A x64 ^ + -Dprotobuf_REMOVE_INSTALLED_HEADERS=ON ^ + -Dprotobuf_BUILD_PROTOBUF_BINARIES=OFF ^ + -Dprotobuf_BUILD_CONFORMANCE=OFF ^ + -Dprotobuf_TEST_XML_OUTDIR=%KOKORO_ARTIFACTS_DIR%\logs\ || goto :error + +cmake --build . --target ALL_BUILD || goto :error + +ctest --verbose -C Debug || goto :error + +goto :success + +:error +cd /d %~dp0\..\..\.. +echo Failed! +exit /b 1 + +:success +cd .. diff --git a/kokoro/windows/cmake_nmake/build.bat b/kokoro/windows/cmake_nmake/build.bat index 52b83f4666..a0807714fc 100644 --- a/kokoro/windows/cmake_nmake/build.bat +++ b/kokoro/windows/cmake_nmake/build.bat @@ -1,4 +1,34 @@ @rem enter repo root cd /d %~dp0\..\..\.. -@rem TODO(mkruskal) Implement tests +call kokoro\windows\prepare_build_win64.bat || goto :error + +@rem TODO(b/241475022) Use docker to guarantee better stability. +@rem TODO(b/241484899) Run conformance tests in windows. + +md build -ea 0 +md %KOKORO_ARTIFACTS_DIR%\logs -ea 0 + +cd build + +cmake .. ^ + -G "NMake Makefiles" ^ + -DCMAKE_C_COMPILER=cl.exe ^ + -DCMAKE_CXX_COMPILER=cl.exe ^ + -Dprotobuf_BUILD_CONFORMANCE=OFF ^ + -Dprotobuf_WITH_ZLIB=OFF ^ + -Dprotobuf_TEST_XML_OUTDIR=%KOKORO_ARTIFACTS_DIR%\logs\ || goto :error + +cmake --build . || goto :error + +ctest --verbose -C Debug || goto :error + +goto :success + +:error +cd /d %~dp0\..\..\.. +echo Failed! +exit /b 1 + +:success +cd .. diff --git a/kokoro/windows/cmake_shared/build.bat b/kokoro/windows/cmake_shared/build.bat index 52b83f4666..bcd5244608 100644 --- a/kokoro/windows/cmake_shared/build.bat +++ b/kokoro/windows/cmake_shared/build.bat @@ -1,4 +1,33 @@ @rem enter repo root cd /d %~dp0\..\..\.. -@rem TODO(mkruskal) Implement tests +call kokoro\windows\prepare_build_win64.bat || goto :error + +@rem TODO(b/241475022) Use docker to guarantee better stability. +@rem TODO(b/241484899) Run conformance tests in windows. + +md build -ea 0 +md %KOKORO_ARTIFACTS_DIR%\logs -ea 0 + +cd build + +cmake .. ^ + -G "Visual Studio 15 2017" -A x64 ^ + -Dprotobuf_BUILD_SHARED_LIBS=ON ^ + -Dprotobuf_BUILD_CONFORMANCE=OFF ^ + -Dprotobuf_WITH_ZLIB=OFF ^ + -Dprotobuf_TEST_XML_OUTDIR=%KOKORO_ARTIFACTS_DIR%\logs\ || goto :error + +cmake --build . || goto :error + +ctest --verbose -C Debug || goto :error + +goto :success + +:error +cd /d %~dp0\..\..\.. +echo Failed! +exit /b 1 + +:success +cd .. diff --git a/kokoro/windows/cmake_shared/continuous.cfg b/kokoro/windows/cmake_shared/continuous.cfg index 37e89e068b..0ba52e2a98 100644 --- a/kokoro/windows/cmake_shared/continuous.cfg +++ b/kokoro/windows/cmake_shared/continuous.cfg @@ -1,5 +1,5 @@ # Config file for running tests in Kokoro # Location of the build script in repository -build_file: "protobuf/kokoro/windows/cmake/build.bat" +build_file: "protobuf/kokoro/windows/cmake_shared/build.bat" timeout_mins: 1440 diff --git a/kokoro/windows/cmake_shared/presubmit.cfg b/kokoro/windows/cmake_shared/presubmit.cfg index 37e89e068b..0ba52e2a98 100644 --- a/kokoro/windows/cmake_shared/presubmit.cfg +++ b/kokoro/windows/cmake_shared/presubmit.cfg @@ -1,5 +1,5 @@ # Config file for running tests in Kokoro # Location of the build script in repository -build_file: "protobuf/kokoro/windows/cmake/build.bat" +build_file: "protobuf/kokoro/windows/cmake_shared/build.bat" timeout_mins: 1440 diff --git a/kokoro/windows/prepare_build_win64.bat b/kokoro/windows/prepare_build_win64.bat new file mode 100644 index 0000000000..d8eb2a2b30 --- /dev/null +++ b/kokoro/windows/prepare_build_win64.bat @@ -0,0 +1,16 @@ +@rem Update Chocolatey +choco upgrade -y --no-progress chocolatey +choco install -y --no-progress --pre cmake + +@rem Enable long paths. +Powershell.exe -Command "New-ItemProperty -Path HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem -Name LongPathsEnabled -Value 1 -PropertyType DWORD -Force" + +@rem Update git submodules. +git submodule update --init --recursive + +@rem Select Visual Studio 2017. +call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 + +@rem Convert Windows line breaks to Unix line breaks +@rem This allows text-matching tests to pass +find . -type f -print0 | xargs -0 d2u diff --git a/php/ext/google/protobuf/package.xml b/php/ext/google/protobuf/package.xml index c291714265..ccf1d414fd 100644 --- a/php/ext/google/protobuf/package.xml +++ b/php/ext/google/protobuf/package.xml @@ -10,11 +10,11 @@ protobuf-packages@google.com yes - 2022-07-25 - + 2022-08-09 + - 3.21.4 - 3.21.4 + 3.21.5 + 3.21.5 stable @@ -1388,5 +1388,20 @@ G A release. + + + 3.21.5 + 3.21.5 + + + stable + stable + + 2022-08-09 + + BSD-3-Clause + + + diff --git a/php/ext/google/protobuf/protobuf.h b/php/ext/google/protobuf/protobuf.h index e63e9e8303..836bb8b74e 100644 --- a/php/ext/google/protobuf/protobuf.h +++ b/php/ext/google/protobuf/protobuf.h @@ -127,7 +127,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_setter, 0, 0, 1) ZEND_ARG_INFO(0, value) ZEND_END_ARG_INFO() -#define PHP_PROTOBUF_VERSION "3.21.4" +#define PHP_PROTOBUF_VERSION "3.21.5" // ptr -> PHP object cache. This is a weak map that caches lazily-created // wrapper objects around upb types: diff --git a/protobuf_version.bzl b/protobuf_version.bzl index edac50366c..253aaac46d 100644 --- a/protobuf_version.bzl +++ b/protobuf_version.bzl @@ -1,3 +1,3 @@ -PROTOC_VERSION = '21.4' -PROTOBUF_JAVA_VERSION = '3.21.4' -PROTOBUF_PYTHON_VERSION = '4.21.4' +PROTOC_VERSION = '21.5' +PROTOBUF_JAVA_VERSION = '3.21.5' +PROTOBUF_PYTHON_VERSION = '4.21.5' diff --git a/protoc-artifacts/pom.xml b/protoc-artifacts/pom.xml index d816174f08..c863376dfc 100644 --- a/protoc-artifacts/pom.xml +++ b/protoc-artifacts/pom.xml @@ -8,7 +8,7 @@ com.google.protobuf protoc - 3.21.4 + 3.21.5 pom Protobuf Compiler diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index 49361ee5ee..8377076e99 100644 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -38,6 +38,7 @@ import string import textwrap import unittest +import unittest.mock from google.protobuf import any_pb2 from google.protobuf import struct_pb2 @@ -2484,5 +2485,3 @@ class OptionalColonMessageToStringTest(unittest.TestCase): self.assertEqual('repeated_int32: [1]\n', output) -if __name__ == '__main__': - unittest.main() diff --git a/python/google/protobuf/proto_api.h b/python/google/protobuf/proto_api.h index 9969a91f44..022768e3cd 100644 --- a/python/google/protobuf/proto_api.h +++ b/python/google/protobuf/proto_api.h @@ -134,7 +134,7 @@ struct PyProto_API { inline const char* PyProtoAPICapsuleName() { static const char kCapsuleName[] = - "google.protobuf.pyext._message.proto_API"; + "google3.net.google.protobuf.python.internal.cpp._message.proto_API"; return kCapsuleName; } diff --git a/python/google/protobuf/pyext/message.cc b/python/google/protobuf/pyext/message.cc index abfe0454e9..5baa7b50da 100644 --- a/python/google/protobuf/pyext/message.cc +++ b/python/google/protobuf/pyext/message.cc @@ -247,7 +247,7 @@ static PyObject* New(PyTypeObject* type, PyObject* args, PyObject* kwargs) { if (WKT_classes == nullptr) { ScopedPyObjectPtr well_known_types(PyImport_ImportModule( - "google.protobuf.internal.well_known_types")); + "google3.net.google.protobuf.python.internal.well_known_types")); GOOGLE_DCHECK(well_known_types != nullptr); WKT_classes = PyObject_GetAttrString(well_known_types.get(), "WKTBASES"); @@ -2372,7 +2372,7 @@ PyObject* DeepCopy(CMessage* self, PyObject* arg) { PyObject* ToUnicode(CMessage* self) { // Lazy import to prevent circular dependencies ScopedPyObjectPtr text_format( - PyImport_ImportModule("google.protobuf.text_format")); + PyImport_ImportModule("google3.net.google.protobuf.python.public.text_format")); if (text_format == nullptr) { return nullptr; } @@ -3035,7 +3035,7 @@ bool InitProto2MessageModule(PyObject *m) { reinterpret_cast(&PyMethodDescriptor_Type)); PyObject* enum_type_wrapper = PyImport_ImportModule( - "google.protobuf.internal.enum_type_wrapper"); + "google3.net.google.protobuf.python.internal.enum_type_wrapper"); if (enum_type_wrapper == nullptr) { return false; } @@ -3044,7 +3044,7 @@ bool InitProto2MessageModule(PyObject *m) { Py_DECREF(enum_type_wrapper); PyObject* message_module = PyImport_ImportModule( - "google.protobuf.message"); + "google3.net.google.protobuf.python.public.message"); if (message_module == nullptr) { return false; } diff --git a/python/google/protobuf/pyext/message.h b/python/google/protobuf/pyext/message.h index b17daa5806..2d2e1f51b2 100644 --- a/python/google/protobuf/pyext/message.h +++ b/python/google/protobuf/pyext/message.h @@ -329,7 +329,7 @@ PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg); return err; \ } -#define FULL_MODULE_NAME "google.protobuf.pyext._message" +#define FULL_MODULE_NAME "google3.net.google.protobuf.python.internal.cpp._message" void FormatTypeError(PyObject* arg, const char* expected_types); template diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index edb4fba652..a812dc69cb 100644 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -67,6 +67,7 @@ _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) _FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) _QUOTES = frozenset(("'", '"')) _ANY_FULL_TYPE_NAME = 'google.protobuf.Any' +_DEBUG_STRING_SILENT_MARKER = '\t ' class Error(Exception): @@ -880,6 +881,7 @@ class _Parser(object): type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) tokenizer.Consume(']') tokenizer.TryConsume(':') + self._DetectSilentMarker(tokenizer) if tokenizer.TryConsume('<'): expanded_any_end_token = '>' else: @@ -979,9 +981,11 @@ class _Parser(object): if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: tokenizer.TryConsume(':') + self._DetectSilentMarker(tokenizer) merger = self._MergeMessageField else: tokenizer.Consume(':') + self._DetectSilentMarker(tokenizer) merger = self._MergeScalarField if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and @@ -999,7 +1003,7 @@ class _Parser(object): else: # Proto field is unknown. assert (self.allow_unknown_extension or self.allow_unknown_field) - _SkipFieldContents(tokenizer) + self._SkipFieldContents(tokenizer) # For historical reasons, fields may optionally be separated by commas or # semicolons. @@ -1007,6 +1011,13 @@ class _Parser(object): tokenizer.TryConsume(';') + def _LogSilentMarker(self): + pass + + def _DetectSilentMarker(self, tokenizer): + if tokenizer.contains_silent_marker_before_current_token: + self._LogSilentMarker() + def _ConsumeAnyTypeUrl(self, tokenizer): """Consumes a google.protobuf.Any type URL and returns the type name.""" # Consume "type.googleapis.com/". @@ -1161,112 +1172,108 @@ class _Parser(object): else: setattr(message, field.name, value) + def _SkipFieldContents(self, tokenizer): + """Skips over contents (value or message) of a field. -def _SkipFieldContents(tokenizer): - """Skips over contents (value or message) of a field. - - Args: - tokenizer: A tokenizer to parse the field name and values. - """ - # Try to guess the type of this field. - # If this field is not a message, there should be a ":" between the - # field name and the field value and also the field value should not - # start with "{" or "<" which indicates the beginning of a message body. - # If there is no ":" or there is a "{" or "<" after ":", this field has - # to be a message or the input is ill-formed. - if tokenizer.TryConsume( - ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): - if tokenizer.LookingAt('['): - _SkipRepeatedFieldValue(tokenizer) + Args: + tokenizer: A tokenizer to parse the field name and values. + """ + # Try to guess the type of this field. + # If this field is not a message, there should be a ":" between the + # field name and the field value and also the field value should not + # start with "{" or "<" which indicates the beginning of a message body. + # If there is no ":" or there is a "{" or "<" after ":", this field has + # to be a message or the input is ill-formed. + if tokenizer.TryConsume( + ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): + self._DetectSilentMarker(tokenizer) + if tokenizer.LookingAt('['): + self._SkipRepeatedFieldValue(tokenizer) + else: + self._SkipFieldValue(tokenizer) else: - _SkipFieldValue(tokenizer) - else: - _SkipFieldMessage(tokenizer) - + self._DetectSilentMarker(tokenizer) + self._SkipFieldMessage(tokenizer) -def _SkipField(tokenizer): - """Skips over a complete field (name and value/message). + def _SkipField(self, tokenizer): + """Skips over a complete field (name and value/message). - Args: - tokenizer: A tokenizer to parse the field name and values. - """ - if tokenizer.TryConsume('['): - # Consume extension or google.protobuf.Any type URL - tokenizer.ConsumeIdentifier() - num_identifiers = 1 - while tokenizer.TryConsume('.'): - tokenizer.ConsumeIdentifier() - num_identifiers += 1 - # This is possibly a type URL for an Any message. - if num_identifiers == 3 and tokenizer.TryConsume('/'): + Args: + tokenizer: A tokenizer to parse the field name and values. + """ + if tokenizer.TryConsume('['): + # Consume extension or google.protobuf.Any type URL tokenizer.ConsumeIdentifier() + num_identifiers = 1 while tokenizer.TryConsume('.'): tokenizer.ConsumeIdentifier() - tokenizer.Consume(']') - else: - tokenizer.ConsumeIdentifierOrNumber() - - _SkipFieldContents(tokenizer) - - # For historical reasons, fields may optionally be separated by commas or - # semicolons. - if not tokenizer.TryConsume(','): - tokenizer.TryConsume(';') - - -def _SkipFieldMessage(tokenizer): - """Skips over a field message. + num_identifiers += 1 + # This is possibly a type URL for an Any message. + if num_identifiers == 3 and tokenizer.TryConsume('/'): + tokenizer.ConsumeIdentifier() + while tokenizer.TryConsume('.'): + tokenizer.ConsumeIdentifier() + tokenizer.Consume(']') + else: + tokenizer.ConsumeIdentifierOrNumber() - Args: - tokenizer: A tokenizer to parse the field name and values. - """ + self._SkipFieldContents(tokenizer) - if tokenizer.TryConsume('<'): - delimiter = '>' - else: - tokenizer.Consume('{') - delimiter = '}' + # For historical reasons, fields may optionally be separated by commas or + # semicolons. + if not tokenizer.TryConsume(','): + tokenizer.TryConsume(';') - while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): - _SkipField(tokenizer) + def _SkipFieldMessage(self, tokenizer): + """Skips over a field message. - tokenizer.Consume(delimiter) + Args: + tokenizer: A tokenizer to parse the field name and values. + """ + if tokenizer.TryConsume('<'): + delimiter = '>' + else: + tokenizer.Consume('{') + delimiter = '}' + while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): + self._SkipField(tokenizer) -def _SkipFieldValue(tokenizer): - """Skips over a field value. + tokenizer.Consume(delimiter) - Args: - tokenizer: A tokenizer to parse the field name and values. + def _SkipFieldValue(self, tokenizer): + """Skips over a field value. - Raises: - ParseError: In case an invalid field value is found. - """ - # String/bytes tokens can come in multiple adjacent string literals. - # If we can consume one, consume as many as we can. - if tokenizer.TryConsumeByteString(): - while tokenizer.TryConsumeByteString(): - pass - return + Args: + tokenizer: A tokenizer to parse the field name and values. - if (not tokenizer.TryConsumeIdentifier() and - not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and - not tokenizer.TryConsumeFloat()): - raise ParseError('Invalid field value: ' + tokenizer.token) + Raises: + ParseError: In case an invalid field value is found. + """ + # String/bytes tokens can come in multiple adjacent string literals. + # If we can consume one, consume as many as we can. + if tokenizer.TryConsumeByteString(): + while tokenizer.TryConsumeByteString(): + pass + return + if (not tokenizer.TryConsumeIdentifier() and + not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and + not tokenizer.TryConsumeFloat()): + raise ParseError('Invalid field value: ' + tokenizer.token) -def _SkipRepeatedFieldValue(tokenizer): - """Skips over a repeated field value. + def _SkipRepeatedFieldValue(self, tokenizer): + """Skips over a repeated field value. - Args: - tokenizer: A tokenizer to parse the field value. - """ - tokenizer.Consume('[') - if not tokenizer.LookingAt(']'): - _SkipFieldValue(tokenizer) - while tokenizer.TryConsume(','): - _SkipFieldValue(tokenizer) - tokenizer.Consume(']') + Args: + tokenizer: A tokenizer to parse the field value. + """ + tokenizer.Consume('[') + if not tokenizer.LookingAt(']'): + self._SkipFieldValue(tokenizer) + while tokenizer.TryConsume(','): + self._SkipFieldValue(tokenizer) + tokenizer.Consume(']') class Tokenizer(object): @@ -1307,6 +1314,8 @@ class Tokenizer(object): self._skip_comments = skip_comments self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT or self._WHITESPACE) + self.contains_silent_marker_before_current_token = False + self._SkipWhitespace() self.NextToken() @@ -1339,6 +1348,8 @@ class Tokenizer(object): match = self._whitespace_pattern.match(self._current_line, self._column) if not match: break + self.contains_silent_marker_before_current_token = match.group(0) == ( + ' ' + _DEBUG_STRING_SILENT_MARKER) length = len(match.group(0)) self._column += length @@ -1591,6 +1602,7 @@ class Tokenizer(object): """Reads the next meaningful token.""" self._previous_line = self._line self._previous_column = self._column + self.contains_silent_marker_before_current_token = False self._column += len(self.token) self._SkipWhitespace() diff --git a/ruby/README.md b/ruby/README.md index 42a1ffaad4..be8d6bc8b1 100644 --- a/ruby/README.md +++ b/ruby/README.md @@ -9,7 +9,7 @@ install protoc as well to have Ruby code generation functionality. Installation from Gem --------------------- -In Gemfile (Please check a version of Protocol Buffers you needed [RubyGems](https://rubygems.org/gems/google-protobuf)): +In Gemfile (Please check which version of Protocol Buffers you need: [RubyGems](https://rubygems.org/gems/google-protobuf)): gem 'google-protobuf' diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index 6b8bbaa3c5..e430b79490 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -1398,5 +1398,5 @@ void Message_register(VALUE protobuf) { // Ruby-interned string: "descriptor". We use this identifier to store an // instance variable on message classes we create in order to link them back // to their descriptors. - descriptor_instancevar_interned = rb_intern("descriptor"); + descriptor_instancevar_interned = rb_intern("@descriptor"); } diff --git a/ruby/google-protobuf.gemspec b/ruby/google-protobuf.gemspec index e43665d447..3d776fecea 100644 --- a/ruby/google-protobuf.gemspec +++ b/ruby/google-protobuf.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = "google-protobuf" - s.version = "3.21.4" + s.version = "3.21.5" git_tag = "v#{s.version.to_s.sub('.rc.', '-rc')}" # Converts X.Y.Z.rc.N to vX.Y.Z-rcN, used for the git tag s.licenses = ["BSD-3-Clause"] s.summary = "Protocol Buffers" diff --git a/ruby/pom.xml b/ruby/pom.xml index 4264172772..d56c99d785 100644 --- a/ruby/pom.xml +++ b/ruby/pom.xml @@ -9,7 +9,7 @@ com.google.protobuf.jruby protobuf-jruby - 3.21.4 + 3.21.5 Protocol Buffer JRuby native extension Protocol Buffers are a way of encoding structured data in an efficient yet @@ -76,7 +76,7 @@ com.google.protobuf protobuf-java-util - 3.21.4 + 3.21.5 org.jruby diff --git a/src/Makefile.am b/src/Makefile.am index 15f7aa42a3..5b85e63a61 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -258,6 +258,7 @@ libprotobuf_la_SOURCES = \ google/protobuf/field_mask.pb.cc \ google/protobuf/generated_message_bases.cc \ google/protobuf/generated_message_reflection.cc \ + google/protobuf/generated_message_tctable_gen.cc \ google/protobuf/generated_message_tctable_full.cc \ google/protobuf/io/gzip_stream.cc \ google/protobuf/io/printer.cc \ diff --git a/src/file_lists.cmake b/src/file_lists.cmake index 67092cc39e..b1279cb532 100644 --- a/src/file_lists.cmake +++ b/src/file_lists.cmake @@ -32,6 +32,7 @@ set(libprotobuf_srcs ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_enum_util.cc ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_bases.cc ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_reflection.cc + ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_gen.cc ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_full.cc ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_tctable_lite.cc ${protobuf_SOURCE_DIR}/src/google/protobuf/generated_message_util.cc diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc index 0c1bcf9227..30fdac5afc 100644 --- a/src/google/protobuf/arena.cc +++ b/src/google/protobuf/arena.cc @@ -117,7 +117,7 @@ SerialArena* SerialArena::New(Memory mem, void* owner, ThreadSafeArenaStats* stats) { GOOGLE_DCHECK_LE(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize, mem.size); ThreadSafeArenaStats::RecordAllocateStats( - stats, /*requested=*/mem.size, /*allocated=*/mem.size, /*wasted=*/0); + stats, /*used=*/0, /*allocated=*/mem.size, /*wasted=*/0); auto b = new (mem.ptr) Block{nullptr, mem.size}; return new (b->Pointer(kBlockHeaderSize)) SerialArena(b, owner, stats); } diff --git a/src/google/protobuf/arenaz_sampler_test.cc b/src/google/protobuf/arenaz_sampler_test.cc index 67570e87a4..b49c8659ff 100644 --- a/src/google/protobuf/arenaz_sampler_test.cc +++ b/src/google/protobuf/arenaz_sampler_test.cc @@ -411,6 +411,33 @@ TEST(ThreadSafeArenazSamplerTest, Callback) { sampler.Unregister(info2); } +TEST(ThreadSafeArenazSamplerTest, InitialBlockReportsZeroUsedAndWasted) { + SetThreadSafeArenazEnabled(true); + // Setting 1 as the parameter value means one in every two arenas would be + // sampled, on average. + int32_t oldparam = ThreadSafeArenazSampleParameter(); + SetThreadSafeArenazSampleParameter(1); + SetThreadSafeArenazGlobalNextSample(0); + constexpr int kSize = 571; + int count_found_allocation = 0; + auto& sampler = GlobalThreadSafeArenazSampler(); + for (int i = 0; i < 10; ++i) { + char block[kSize]; + google::protobuf::Arena arena(/*initial_block=*/block, /*initial_block_size=*/kSize); + sampler.Iterate([&](const ThreadSafeArenaStats& h) { + const auto& histbin = + h.block_histogram[ThreadSafeArenaStats::FindBin(kSize)]; + if (histbin.bytes_allocated.load(std::memory_order_relaxed) == kSize) { + count_found_allocation++; + EXPECT_EQ(histbin.bytes_used, 0); + EXPECT_EQ(histbin.bytes_wasted, 0); + } + }); + } + EXPECT_GT(count_found_allocation, 0); + SetThreadSafeArenazSampleParameter(oldparam); +} + class ThreadSafeArenazSamplerTestThread : public Thread { protected: void Run() override { diff --git a/src/google/protobuf/compiler/cpp/enum_field.cc b/src/google/protobuf/compiler/cpp/enum_field.cc index 3539a0df41..1e4389bfe9 100644 --- a/src/google/protobuf/compiler/cpp/enum_field.cc +++ b/src/google/protobuf/compiler/cpp/enum_field.cc @@ -104,7 +104,7 @@ void EnumFieldGenerator::GenerateInlineAccessorDefinitions( " return _internal_$name$();\n" "}\n" "inline void $classname$::_internal_set_$name$($type$ value) {\n"); - if (!HasPreservingUnknownEnumSemantics(descriptor_)) { + if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) { format(" assert($type$_IsValid(value));\n"); } format( @@ -204,7 +204,7 @@ void EnumOneofFieldGenerator::GenerateInlineAccessorDefinitions( " return _internal_$name$();\n" "}\n" "inline void $classname$::_internal_set_$name$($type$ value) {\n"); - if (!HasPreservingUnknownEnumSemantics(descriptor_)) { + if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) { format(" assert($type$_IsValid(value));\n"); } format( @@ -291,7 +291,7 @@ void RepeatedEnumFieldGenerator::GenerateInlineAccessorDefinitions( " return _internal_$name$(index);\n" "}\n" "inline void $classname$::set_$name$(int index, $type$ value) {\n"); - if (!HasPreservingUnknownEnumSemantics(descriptor_)) { + if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) { format(" assert($type$_IsValid(value));\n"); } format( @@ -300,7 +300,7 @@ void RepeatedEnumFieldGenerator::GenerateInlineAccessorDefinitions( " // @@protoc_insertion_point(field_set:$full_name$)\n" "}\n" "inline void $classname$::_internal_add_$name$($type$ value) {\n"); - if (!HasPreservingUnknownEnumSemantics(descriptor_)) { + if (!internal::cpp::HasPreservingUnknownEnumSemantics(descriptor_)) { format(" assert($type$_IsValid(value));\n"); } format( diff --git a/src/google/protobuf/compiler/cpp/field.cc b/src/google/protobuf/compiler/cpp/field.cc index 90d20848b3..fdd31abff1 100644 --- a/src/google/protobuf/compiler/cpp/field.cc +++ b/src/google/protobuf/compiler/cpp/field.cc @@ -267,7 +267,7 @@ void SetCommonFieldVariables(const FieldDescriptor* descriptor, } void FieldGenerator::SetHasBitIndex(int32_t has_bit_index) { - if (!HasHasbit(descriptor_)) { + if (!internal::cpp::HasHasbit(descriptor_)) { GOOGLE_CHECK_EQ(has_bit_index, -1); return; } diff --git a/src/google/protobuf/compiler/cpp/file.cc b/src/google/protobuf/compiler/cpp/file.cc index 502d8c007e..0a73e3e418 100644 --- a/src/google/protobuf/compiler/cpp/file.cc +++ b/src/google/protobuf/compiler/cpp/file.cc @@ -51,6 +51,7 @@ #include #include #include +#include #include // Must be last. diff --git a/src/google/protobuf/compiler/cpp/helpers.cc b/src/google/protobuf/compiler/cpp/helpers.cc index d182c8607f..b3ceafb5fa 100644 --- a/src/google/protobuf/compiler/cpp/helpers.cc +++ b/src/google/protobuf/compiler/cpp/helpers.cc @@ -1133,39 +1133,16 @@ bool IsWellKnownMessage(const FileDescriptor* file) { return well_known_files.find(file->name()) != well_known_files.end(); } -static bool FieldEnforceUtf8(const FieldDescriptor* field, - const Options& options) { - return true; -} - -static bool FileUtf8Verification(const FileDescriptor* file, - const Options& options) { - return true; -} - -// Which level of UTF-8 enforcemant is placed on this file. -Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, - const Options& options) { - if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 && - FieldEnforceUtf8(field, options)) { - return Utf8CheckMode::kStrict; - } else if (GetOptimizeFor(field->file(), options) != - FileOptions::LITE_RUNTIME && - FileUtf8Verification(field->file(), options)) { - return Utf8CheckMode::kVerify; - } else { - return Utf8CheckMode::kNone; - } -} - static void GenerateUtf8CheckCode(const FieldDescriptor* field, const Options& options, bool for_parse, const char* parameters, const char* strict_function, const char* verify_function, const Formatter& format) { - switch (GetUtf8CheckMode(field, options)) { - case Utf8CheckMode::kStrict: { + switch (internal::cpp::GetUtf8CheckMode( + field, + GetOptimizeFor(field->file(), options) == FileOptions::LITE_RUNTIME)) { + case internal::cpp::Utf8CheckMode::kStrict: { if (for_parse) { format("DO_("); } @@ -1185,7 +1162,7 @@ static void GenerateUtf8CheckCode(const FieldDescriptor* field, format.Outdent(); break; } - case Utf8CheckMode::kVerify: { + case internal::cpp::Utf8CheckMode::kVerify: { format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function); format.Indent(); format(parameters); @@ -1198,7 +1175,7 @@ static void GenerateUtf8CheckCode(const FieldDescriptor* field, format.Outdent(); break; } - case Utf8CheckMode::kNone: + case internal::cpp::Utf8CheckMode::kNone: break; } } diff --git a/src/google/protobuf/compiler/cpp/helpers.h b/src/google/protobuf/compiler/cpp/helpers.h index 21a488e2a5..12bc9bbd1c 100644 --- a/src/google/protobuf/compiler/cpp/helpers.h +++ b/src/google/protobuf/compiler/cpp/helpers.h @@ -462,28 +462,6 @@ inline bool IsProto3(const FileDescriptor* file) { return file->syntax() == FileDescriptor::SYNTAX_PROTO3; } -inline bool HasHasbit(const FieldDescriptor* field) { - // This predicate includes proto3 message fields only if they have "optional". - // Foo submsg1 = 1; // HasHasbit() == false - // optional Foo submsg2 = 2; // HasHasbit() == true - // This is slightly odd, as adding "optional" to a singular proto3 field does - // not change the semantics or API. However whenever any field in a message - // has a hasbit, it forces reflection to include hasbit offsets for *all* - // fields, even if almost all of them are set to -1 (no hasbit). So to avoid - // causing a sudden size regression for ~all proto3 messages, we give proto3 - // message fields a hasbit only if "optional" is present. If the user is - // explicitly writing "optional", it is likely they are writing it on - // primitive fields also. - return (field->has_optional_keyword() || field->is_required()) && - !field->options().weak(); -} - -// Returns true if 'enum' semantics are such that unknown values are preserved -// in the enum field itself, rather than going to the UnknownFieldSet. -inline bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) { - return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3; -} - inline bool IsCrossFileMessage(const FieldDescriptor* field) { return field->type() == FieldDescriptor::TYPE_MESSAGE && field->message_type()->file() != field->file(); @@ -935,15 +913,6 @@ class PROTOC_EXPORT NamespaceOpener { std::vector name_stack_; }; -enum class Utf8CheckMode { - kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields. - kVerify = 1, // Only log an error but parsing will succeed. - kNone = 2, // No UTF-8 check. -}; - -Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, - const Options& options); - void GenerateUtf8CheckCodeForString(const FieldDescriptor* field, const Options& options, bool for_parse, const char* parameters, @@ -954,43 +923,6 @@ void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field, const char* parameters, const Formatter& format); -template -struct FieldRangeImpl { - struct Iterator { - using iterator_category = std::forward_iterator_tag; - using value_type = const FieldDescriptor*; - using difference_type = int; - - value_type operator*() { return descriptor->field(idx); } - - friend bool operator==(const Iterator& a, const Iterator& b) { - GOOGLE_DCHECK(a.descriptor == b.descriptor); - return a.idx == b.idx; - } - friend bool operator!=(const Iterator& a, const Iterator& b) { - return !(a == b); - } - - Iterator& operator++() { - idx++; - return *this; - } - - int idx; - const T* descriptor; - }; - - Iterator begin() const { return {0, descriptor}; } - Iterator end() const { return {descriptor->field_count(), descriptor}; } - - const T* descriptor; -}; - -template -FieldRangeImpl FieldRange(const T* desc) { - return {desc}; -} - struct OneOfRangeImpl { struct Iterator { using iterator_category = std::forward_iterator_tag; diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index 50b86f7b2b..d37e70c329 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -37,8 +37,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -73,6 +75,8 @@ namespace cpp { using internal::WireFormat; using internal::WireFormatLite; +using internal::cpp::HasHasbit; +using internal::cpp::Utf8CheckMode; namespace { @@ -1315,7 +1319,9 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* printer) { " static const $classname$* internal_default_instance() { return " "reinterpret_cast(&_$classname$_default_instance_); }\n"); - auto utf8_check = GetUtf8CheckMode(descriptor_->field(0), options_); + auto utf8_check = internal::cpp::GetUtf8CheckMode( + descriptor_->field(0), GetOptimizeFor(descriptor_->file(), options_) == + FileOptions::LITE_RUNTIME); if (descriptor_->field(0)->type() == FieldDescriptor::TYPE_STRING && utf8_check != Utf8CheckMode::kNone) { if (utf8_check == Utf8CheckMode::kStrict) { diff --git a/src/google/protobuf/compiler/cpp/message.h b/src/google/protobuf/compiler/cpp/message.h index 998ebdb242..b252919302 100644 --- a/src/google/protobuf/compiler/cpp/message.h +++ b/src/google/protobuf/compiler/cpp/message.h @@ -36,6 +36,7 @@ #define GOOGLE_PROTOBUF_COMPILER_CPP_MESSAGE_H__ #include +#include #include #include #include diff --git a/src/google/protobuf/compiler/cpp/message_field.cc b/src/google/protobuf/compiler/cpp/message_field.cc index 7e87a07993..a51ad9fdb7 100644 --- a/src/google/protobuf/compiler/cpp/message_field.cc +++ b/src/google/protobuf/compiler/cpp/message_field.cc @@ -343,7 +343,7 @@ void MessageFieldGenerator::GenerateInternalAccessorDefinitions( format( "::$proto_ns$::MessageLite*\n" "$classname$::_Internal::mutable_$name$($classname$* msg) {\n"); - if (HasHasbit(descriptor_)) { + if (internal::cpp::HasHasbit(descriptor_)) { format(" msg->$set_hasbit$\n"); } if (descriptor_->real_containing_oneof() == nullptr) { @@ -376,7 +376,7 @@ void MessageFieldGenerator::GenerateClearingCode(io::Printer* printer) const { GOOGLE_CHECK(!IsFieldStripped(descriptor_, options_)); Formatter format(printer, variables_); - if (!HasHasbit(descriptor_)) { + if (!internal::cpp::HasHasbit(descriptor_)) { // If we don't have has-bits, message presence is indicated only by ptr != // nullptr. Thus on clear, we need to delete the object. format( @@ -394,7 +394,7 @@ void MessageFieldGenerator::GenerateMessageClearingCode( GOOGLE_CHECK(!IsFieldStripped(descriptor_, options_)); Formatter format(printer, variables_); - if (!HasHasbit(descriptor_)) { + if (!internal::cpp::HasHasbit(descriptor_)) { // If we don't have has-bits, message presence is indicated only by ptr != // nullptr. Thus on clear, we need to delete the object. format( diff --git a/src/google/protobuf/compiler/cpp/parse_function_generator.cc b/src/google/protobuf/compiler/cpp/parse_function_generator.cc index 30b0fd44fe..412537af86 100644 --- a/src/google/protobuf/compiler/cpp/parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/parse_function_generator.cc @@ -37,6 +37,7 @@ #include #include +#include #include namespace google { @@ -45,11 +46,14 @@ namespace compiler { namespace cpp { namespace { +using internal::TailCallTableInfo; +using internal::cpp::Utf8CheckMode; using google::protobuf::internal::WireFormat; using google::protobuf::internal::WireFormatLite; bool UseDirectTcParserTable(const FieldDescriptor* field, const Options& options) { + if (field->cpp_type() != field->CPPTYPE_MESSAGE) return false; auto* m = field->message_type(); return !m->options().message_set_wire_format() && m->file()->options().optimize_for() != FileOptions::CODE_SIZE && @@ -76,379 +80,26 @@ bool HasInternalAccessors(const FieldOptions::CType ctype) { return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD; } -int TagSize(uint32_t field_number) { - if (field_number < 16) return 1; - GOOGLE_CHECK_LT(field_number, (1 << 14)) - << "coded tag for " << field_number << " too big for uint16_t"; - return 2; -} - -void PopulateFastFieldEntry(const Descriptor* descriptor, - const TailCallTableInfo::FieldEntryInfo& entry, - const Options& options, - TailCallTableInfo::FastFieldInfo& info); - -bool IsFieldEligibleForFastParsing( - const TailCallTableInfo::FieldEntryInfo& entry, const Options& options, - MessageSCCAnalyzer* scc_analyzer) { - const auto* field = entry.field; - // Map, oneof, weak, and lazy fields are not handled on the fast path. - if (field->is_map() || field->real_containing_oneof() || - field->options().weak() || - IsImplicitWeakField(field, options, scc_analyzer) || - IsLazy(field, options, scc_analyzer) || ShouldSplit(field, options)) { - return false; - } - - // We will check for a valid auxiliary index range later. However, we might - // want to change the value we check for inlined string fields. - int aux_idx = entry.aux_idx; - - switch (field->type()) { - case FieldDescriptor::TYPE_ENUM: - // If enum values are not validated at parse time, then this field can be - // handled on the fast path like an int32. - if (HasPreservingUnknownEnumSemantics(field)) { - break; - } - if (field->is_repeated() && field->is_packed()) { - return false; - } - break; - - // Some bytes fields can be handled on fast path. - case FieldDescriptor::TYPE_STRING: - case FieldDescriptor::TYPE_BYTES: - if (field->options().ctype() != FieldOptions::STRING) { - return false; - } - if (IsStringInlined(field, options)) { - GOOGLE_CHECK(!field->is_repeated()); - // For inlined strings, the donation state index is stored in the - // `aux_idx` field of the fast parsing info. We need to check the range - // of that value instead of the auxiliary index. - aux_idx = entry.inlined_string_idx; - } - break; - - default: - break; - } - - if (HasHasbit(field)) { - // The tailcall parser can only update the first 32 hasbits. Fields with - // has-bits beyond the first 32 are handled by mini parsing/fallback. - GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString(); - if (entry.hasbit_idx >= 32) return false; - } - - // If the field needs auxiliary data, then the aux index is needed. This - // must fit in a uint8_t. - if (aux_idx > std::numeric_limits::max()) { - return false; - } - - // The largest tag that can be read by the tailcall parser is two bytes - // when varint-coded. This allows 14 bits for the numeric tag value: - // byte 0 byte 1 - // 1nnnnttt 0nnnnnnn - // ^^^^^^^ ^^^^^^^ - if (field->number() >= 1 << 11) return false; - - return true; -} - -std::vector SplitFastFieldsForSize( - const Descriptor* descriptor, - const std::vector& field_entries, - int table_size_log2, const Options& options, - MessageSCCAnalyzer* scc_analyzer) { - std::vector result(1 << table_size_log2); - const uint32_t idx_mask = result.size() - 1; - - for (const auto& entry : field_entries) { - if (!IsFieldEligibleForFastParsing(entry, options, scc_analyzer)) { - continue; - } - - const auto* field = entry.field; - uint32_t tag = WireFormat::MakeTag(field); - - // Construct the varint-coded tag. If it is more than 7 bits, we need to - // shift the high bits and add a continue bit. - if (uint32_t hibits = tag & 0xFFFFFF80) { - tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128 - } - - // The field index is determined by the low bits of the field number, where - // the table size determines the width of the mask. The largest table - // supported is 32 entries. The parse loop uses these bits directly, so that - // the dispatch does not require arithmetic: - // byte 0 byte 1 - // tag: 1nnnnttt 0nnnnnnn - // ^^^^^ - // idx (table_size_log2=5) - // This means that any field number that does not fit in the lower 4 bits - // will always have the top bit of its table index asserted. - const uint32_t fast_idx = (tag >> 3) & idx_mask; - - TailCallTableInfo::FastFieldInfo& info = result[fast_idx]; - if (info.field != nullptr) { - // This field entry is already filled. - continue; - } - - // Fill in this field's entry: - GOOGLE_CHECK(info.func_name.empty()) << info.func_name; - info.field = field; - info.coded_tag = tag; - PopulateFastFieldEntry(descriptor, entry, options, info); - // If this field does not have presence, then it can set an out-of-bounds - // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32). - info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63; - } - return result; -} - -// Filter out fields that will be handled by mini parsing. -std::vector FilterMiniParsedFields( - const std::vector& fields, const Options& options, - MessageSCCAnalyzer* scc_analyzer) { - std::vector generated_fallback_fields; - - for (const auto* field : fields) { - bool handled = false; - switch (field->type()) { - case FieldDescriptor::TYPE_DOUBLE: - case FieldDescriptor::TYPE_FLOAT: - case FieldDescriptor::TYPE_FIXED32: - case FieldDescriptor::TYPE_SFIXED32: - case FieldDescriptor::TYPE_FIXED64: - case FieldDescriptor::TYPE_SFIXED64: - case FieldDescriptor::TYPE_BOOL: - case FieldDescriptor::TYPE_UINT32: - case FieldDescriptor::TYPE_SINT32: - case FieldDescriptor::TYPE_INT32: - case FieldDescriptor::TYPE_UINT64: - case FieldDescriptor::TYPE_SINT64: - case FieldDescriptor::TYPE_INT64: - // These are handled by MiniParse, so we don't need any generated - // fallback code. - handled = true; - break; - - case FieldDescriptor::TYPE_ENUM: - if (field->is_repeated() && !HasPreservingUnknownEnumSemantics(field)) { - // TODO(b/206890171): handle packed repeated closed enums - // Non-packed repeated can be handled using tables, but we still - // need to generate fallback code for all repeated enums in order to - // handle packed encoding. This is because of the lite/full split - // when handling invalid enum values in a packed field. - handled = false; - } else { - handled = true; - } - break; - - case FieldDescriptor::TYPE_BYTES: - case FieldDescriptor::TYPE_STRING: - if (IsStringInlined(field, options)) { - // TODO(b/198211897): support InilnedStringField. - handled = false; - } else { - handled = true; - } - break; - - case FieldDescriptor::TYPE_MESSAGE: - case FieldDescriptor::TYPE_GROUP: - // TODO(b/210762816): support remaining field types. - if (field->is_map() || IsWeak(field, options) || - IsImplicitWeakField(field, options, scc_analyzer) || - IsLazy(field, options, scc_analyzer)) { - handled = false; - } else { - handled = true; - } - break; - - default: - handled = false; - break; - } - if (!handled) generated_fallback_fields.push_back(field); - } - - return generated_fallback_fields; -} - } // namespace -TailCallTableInfo::TailCallTableInfo( - const Descriptor* descriptor, const Options& options, - const std::vector& ordered_fields, - const std::vector& has_bit_indices, - const std::vector& inlined_string_indices, - MessageSCCAnalyzer* scc_analyzer) { - // If this message has any inlined string fields, store the donation state - // offset in the second auxiliary entry. - - const auto set_fixed_aux_entry = [&](int index, const std::string& value) { - if (index >= aux_entries.size()) { - aux_entries.resize(index + 1); // pad if necessary - } - aux_entries[index] = value; - }; - - if (!inlined_string_indices.empty()) { - set_fixed_aux_entry( - internal::kInlinedStringAuxIdx, - StrCat("_fl::Offset{offsetof(", ClassName(descriptor), - ", _impl_._inlined_string_donated_)}")); - } - - // If this message is split, store the split pointer offset in the third - // auxiliary entry. - if (ShouldSplit(descriptor, options)) { - set_fixed_aux_entry( - internal::kSplitOffsetAuxIdx, - StrCat("_fl::Offset{offsetof(", ClassName(descriptor), - ", _impl_._split_)}")); - set_fixed_aux_entry( - internal::kSplitSizeAuxIdx, - StrCat("_fl::Offset{sizeof(", ClassName(descriptor), - "::Impl_::Split)}")); +class ParseFunctionGenerator::GeneratedOptionProvider final + : public internal::TailCallTableInfo::OptionProvider { + public: + explicit GeneratedOptionProvider(ParseFunctionGenerator* gen) : gen_(gen) {} + TailCallTableInfo::PerFieldOptions GetForField( + const FieldDescriptor* field) const final { + return {IsLazy(field, gen_->options_, gen_->scc_analyzer_), + IsStringInlined(field, gen_->options_), + IsImplicitWeakField(field, gen_->options_, gen_->scc_analyzer_), + UseDirectTcParserTable(field, gen_->options_), + GetOptimizeFor(field->file(), gen_->options_) == + FileOptions::LITE_RUNTIME, + ShouldSplit(field, gen_->options_)}; } - // Fill in mini table entries. - for (const FieldDescriptor* field : ordered_fields) { - field_entries.push_back( - {field, (HasHasbit(field) ? has_bit_indices[field->index()] : -1)}); - auto& entry = field_entries.back(); - - if (field->type() == FieldDescriptor::TYPE_MESSAGE || - field->type() == FieldDescriptor::TYPE_GROUP) { - // Message-typed fields have a FieldAux with the default instance pointer. - if (field->is_map()) { - // TODO(b/205904770): generate aux entries for maps - } else if (IsWeak(field, options)) { - // Don't generate anything for weak fields. They are handled by the - // generated fallback. - } else if (IsImplicitWeakField(field, options, scc_analyzer)) { - // Implicit weak fields don't need to store a default instance pointer. - } else if (IsLazy(field, options, scc_analyzer)) { - // Lazy fields are handled by the generated fallback function. - } else { - field_entries.back().aux_idx = aux_entries.size(); - if (UseDirectTcParserTable(field, options)) { - const Descriptor* field_type = field->message_type(); - aux_entries.push_back( - StrCat("::_pbi::TcParser::GetTable<", - QualifiedClassName(field_type, options), ">()")); - } else { - const Descriptor* field_type = field->message_type(); - aux_entries.push_back( - StrCat("::_pbi::FieldAuxDefaultMessage{}, &", - QualifiedDefaultInstanceName(field_type, options))); - } - } - } else if (field->type() == FieldDescriptor::TYPE_ENUM && - !HasPreservingUnknownEnumSemantics(field)) { - // Enum fields which preserve unknown values (proto3 behavior) are - // effectively int32 fields with respect to parsing -- i.e., the value - // does not need to be validated at parse time. - // - // Enum fields which do not preserve unknown values (proto2 behavior) use - // a FieldAux to store validation information. If the enum values are - // sequential (and within a range we can represent), then the FieldAux - // entry represents the range using the minimum value (which must fit in - // an int16_t) and count (a uint16_t). Otherwise, the entry holds a - // pointer to the generated Name_IsValid function. - - entry.aux_idx = aux_entries.size(); - const EnumDescriptor* enum_type = field->enum_type(); - GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString(); - - // Check if the enum values are a single, contiguous range. - std::vector enum_values; - for (int i = 0, N = enum_type->value_count(); i < N; ++i) { - enum_values.push_back(enum_type->value(i)->number()); - } - auto values_begin = enum_values.begin(); - auto values_end = enum_values.end(); - std::sort(values_begin, values_end); - enum_values.erase(std::unique(values_begin, values_end), values_end); - - if (enum_values.back() - enum_values[0] == enum_values.size() - 1 && - enum_values[0] >= std::numeric_limits::min() && - enum_values[0] <= std::numeric_limits::max() && - enum_values.size() <= std::numeric_limits::max()) { - entry.is_enum_range = true; - entry.enum_range_min = enum_values.front(); - entry.enum_range_max = enum_values.back(); - aux_entries.push_back( - StrCat(enum_values[0], ", ", enum_values.size())); - } else { - entry.is_enum_range = false; - aux_entries.push_back( - StrCat(QualifiedClassName(enum_type, options), "_IsValid")); - } - } else if ((field->type() == FieldDescriptor::TYPE_STRING || - field->type() == FieldDescriptor::TYPE_BYTES) && - IsStringInlined(field, options)) { - GOOGLE_CHECK(!field->is_repeated()); - // Inlined strings have an extra marker to represent their donation state. - int idx = inlined_string_indices[field->index()]; - // For mini parsing, the donation state index is stored as an `offset` - // auxiliary entry. - entry.aux_idx = aux_entries.size(); - aux_entries.push_back(StrCat("_fl::Offset{", idx, "}")); - // For fast table parsing, the donation state index is stored instead of - // the aux_idx (this will limit the range to 8 bits). - entry.inlined_string_idx = idx; - } - } - - // Choose the smallest fast table that covers the maximum number of fields. - table_size_log2 = 0; // fallback value - int num_fast_fields = -1; - for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) { - size_t try_size = 1 << try_size_log2; - auto split_fields = SplitFastFieldsForSize( - descriptor, field_entries, try_size_log2, options, scc_analyzer); - GOOGLE_CHECK_EQ(split_fields.size(), try_size); - int try_num_fast_fields = 0; - for (const auto& info : split_fields) { - if (info.field != nullptr) ++try_num_fast_fields; - } - // Use this size if (and only if) it covers more fields. - if (try_num_fast_fields > num_fast_fields) { - fast_path_fields = std::move(split_fields); - table_size_log2 = try_size_log2; - num_fast_fields = try_num_fast_fields; - } - // The largest table we allow has the same number of entries as the message - // has fields, rounded up to the next power of 2 (e.g., a message with 5 - // fields can have a fast table of size 8). A larger table *might* cover - // more fields in certain cases, but a larger table in that case would have - // mostly empty entries; so, we cap the size to avoid pathologically sparse - // tables. - if (try_size > ordered_fields.size()) { - break; - } - } - - // Filter out fields that are handled by MiniParse. We don't need to generate - // a fallback for these, which saves code size. - fallback_fields = FilterMiniParsedFields(ordered_fields, options, - scc_analyzer); - - // If there are no fallback fields, and at most one extension range, the - // parser can use a generic fallback function. Otherwise, a message-specific - // fallback routine is needed. - use_generated_fallback = - !fallback_fields.empty() || descriptor->extension_range_count() > 1; -} + private: + ParseFunctionGenerator* gen_; +}; ParseFunctionGenerator::ParseFunctionGenerator( const Descriptor* descriptor, int max_has_bit_index, @@ -465,8 +116,8 @@ ParseFunctionGenerator::ParseFunctionGenerator( num_hasbits_(max_has_bit_index) { if (should_generate_tctable()) { tc_table_info_.reset(new TailCallTableInfo( - descriptor_, options_, ordered_fields_, has_bit_indices, - inlined_string_indices, scc_analyzer)); + descriptor_, ordered_fields_, GeneratedOptionProvider(this), + has_bit_indices, inlined_string_indices)); } SetCommonVars(options_, &variables_); SetCommonMessageDataVariables(descriptor_, &variables_); @@ -636,7 +287,10 @@ void ParseFunctionGenerator::GenerateDataDecls(io::Printer* printer) { "static const ::$proto_ns$::internal::" "TcParseTable<$1$, $2$, $3$, $4$, $5$> _table_;\n", tc_table_info_->table_size_log2, ordered_fields_.size(), - tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(), + tc_table_info_->aux_entries.size(), + // We add a +1 here to allow for a NUL termination character. It makes the + // codegen nicer. + tc_table_info_->field_name_data.size() + 1, field_num_to_entry_table.size16()); if (should_generate_guarded_tctable()) { format.Outdent(); @@ -795,7 +449,8 @@ void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) { "$classname$::_table_ = " "{\n", tc_table_info_->table_size_log2, ordered_fields_.size(), - tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(), + tc_table_info_->aux_entries.size(), + tc_table_info_->field_name_data.size() + 1, // See above for why +1 field_num_to_entry_table.size16()); { auto table_scope = format.ScopedIndent(); @@ -903,8 +558,46 @@ void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) { { // aux_entries[] auto aux_scope = format.ScopedIndent(); - for (const std::string& aux_entry : tc_table_info_->aux_entries) { - format("{$1$},\n", aux_entry); + for (const auto& aux_entry : tc_table_info_->aux_entries) { + switch (aux_entry.type) { + case TailCallTableInfo::kNothing: + format("{},\n"); + break; + case TailCallTableInfo::kInlinedStringDonatedOffset: + format( + "{_fl::Offset{offsetof($classname$, " + "_impl_._inlined_string_donated_)}},\n"); + break; + case TailCallTableInfo::kSplitOffset: + format( + "{_fl::Offset{offsetof($classname$, _impl_._split_)}},\n"); + break; + case TailCallTableInfo::kSplitSizeof: + format("{_fl::Offset{sizeof($classname$::Impl_::Split)}},\n"); + break; + case TailCallTableInfo::kSubMessage: + format("{::_pbi::FieldAuxDefaultMessage{}, &$1$},\n", + QualifiedDefaultInstanceName( + aux_entry.field->message_type(), options_)); + break; + case TailCallTableInfo::kSubTable: + format("{::_pbi::TcParser::GetTable<$1$>()},\n", + QualifiedClassName(aux_entry.field->message_type(), + options_)); + break; + case TailCallTableInfo::kEnumRange: + format("{$1$, $2$},\n", aux_entry.enum_range.start, + aux_entry.enum_range.size); + break; + case TailCallTableInfo::kEnumValidator: + format( + "{$1$_IsValid},\n", + QualifiedClassName(aux_entry.field->enum_type(), options_)); + break; + case TailCallTableInfo::kNumericOffset: + format("{_fl::Offset{$1$}},\n", aux_entry.offset); + break; + } } } format("}}, {{\n"); @@ -929,155 +622,165 @@ void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) { format("{::_pbi::TcParser::MiniParse, {}},\n"); } else { GOOGLE_CHECK(!ShouldSplit(info.field, options_)); + + std::string func_name = info.func_name; + // For 1-byte tags we have a more optimized version of the varint parser + // that can hardcode the offset and has bit. + if (HasSuffixString(func_name, "V8S1") || + HasSuffixString(func_name, "V32S1") || + HasSuffixString(func_name, "V64S1")) { + std::string field_type = HasSuffixString(func_name, "V8S1") ? "bool" + : HasSuffixString(func_name, "V32S1") + ? "uint32_t" + : "uint64_t"; + func_name = + StrCat("::_pbi::TcParser::SingularVarintNoZag1<", field_type, + ", offsetof(", // + ClassName(info.field->containing_type()), // + ", ", // + FieldMemberName(info.field, /*split=*/false), // + "), ", // + info.hasbit_idx, // + ">()"); + } + format( "{$1$,\n" " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$, $5$)}},\n", - info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx, + func_name, info.coded_tag, info.hasbit_idx, info.aux_idx, FieldMemberName(info.field, /*split=*/false)); } } } static void FormatFieldKind(Formatter& format, - const TailCallTableInfo::FieldEntryInfo& entry, - const Options& options, - MessageSCCAnalyzer* scc_analyzer) { - const FieldDescriptor* field = entry.field; - // Spell the field kind in proto language declaration order, starting with - // cardinality: - format("(::_fl::kFc"); - if (HasHasbit(field)) { - format("Optional"); - } else if (field->is_repeated()) { - format("Repeated"); - } else if (field->real_containing_oneof()) { - format("Oneof"); - } else { - format("Singular"); - } + const TailCallTableInfo::FieldEntryInfo& entry) { + // In here we convert the runtime value of entry.type_card back into a + // sequence of literal enum labels. We use the mnenonic labels for nicer + // codegen. + namespace fl = internal::field_layout; + const uint16_t type_card = entry.type_card; + const int rep_index = (type_card & fl::kRepMask) >> fl::kRepShift; + const int tv_index = (type_card & fl::kTvMask) >> fl::kTvShift; + + format("("); + static constexpr const char* kFieldCardNames[] = {"Singular", "Optional", + "Repeated", "Oneof"}; + static_assert((fl::kFcSingular >> fl::kFcShift) == 0, ""); + static_assert((fl::kFcOptional >> fl::kFcShift) == 1, ""); + static_assert((fl::kFcRepeated >> fl::kFcShift) == 2, ""); + static_assert((fl::kFcOneof >> fl::kFcShift) == 3, ""); + + format("::_fl::kFc$1$", + kFieldCardNames[(type_card & fl::kFcMask) >> fl::kFcShift]); + +#define PROTOBUF_INTERNAL_TYPE_CARD_CASE(x) \ + case fl::k##x: \ + format(" | ::_fl::k" #x); \ + break + + switch (type_card & fl::kFkMask) { + case fl::kFkString: { + switch (type_card & ~fl::kFcMask & ~fl::kRepMask & ~fl::kSplitMask) { + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Bytes); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(RawString); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Utf8String); + default: + GOOGLE_LOG(FATAL) << "Unknown type_card: 0x" << type_card; + } - // The rest of the type uses convenience aliases: - format(" | ::_fl::k"); - if (field->is_repeated() && field->is_packed()) { - format("Packed"); - } - switch (field->type()) { - case FieldDescriptor::TYPE_DOUBLE: - format("Double"); - break; - case FieldDescriptor::TYPE_FLOAT: - format("Float"); - break; - case FieldDescriptor::TYPE_FIXED32: - format("Fixed32"); - break; - case FieldDescriptor::TYPE_SFIXED32: - format("SFixed32"); - break; - case FieldDescriptor::TYPE_FIXED64: - format("Fixed64"); - break; - case FieldDescriptor::TYPE_SFIXED64: - format("SFixed64"); - break; - case FieldDescriptor::TYPE_BOOL: - format("Bool"); + static constexpr const char* kRepNames[] = {"AString", "IString", "Cord", + "SPiece", "SString"}; + static_assert((fl::kRepAString >> fl::kRepShift) == 0, ""); + static_assert((fl::kRepIString >> fl::kRepShift) == 1, ""); + static_assert((fl::kRepCord >> fl::kRepShift) == 2, ""); + static_assert((fl::kRepSPiece >> fl::kRepShift) == 3, ""); + static_assert((fl::kRepSString >> fl::kRepShift) == 4, ""); + + format(" | ::_fl::kRep$1$", kRepNames[rep_index]); break; - case FieldDescriptor::TYPE_ENUM: - if (HasPreservingUnknownEnumSemantics(field)) { - // No validation is required. - format("OpenEnum"); - } else if (entry.is_enum_range) { - // Validation is done by range check (start/length in FieldAux). - format("EnumRange"); - } else { - // Validation uses the generated _IsValid function. - format("Enum"); + } + + case fl::kFkMessage: { + format(" | ::_fl::kMessage"); + + static constexpr const char* kRepNames[] = {nullptr, "Group", "Lazy", + "IWeak"}; + static_assert((fl::kRepGroup >> fl::kRepShift) == 1, ""); + static_assert((fl::kRepLazy >> fl::kRepShift) == 2, ""); + static_assert((fl::kRepIWeak >> fl::kRepShift) == 3, ""); + + if (auto* rep = kRepNames[rep_index]) { + format(" | ::_fl::kRep$1$", rep); } - break; - case FieldDescriptor::TYPE_UINT32: - format("UInt32"); - break; - case FieldDescriptor::TYPE_SINT32: - format("SInt32"); - break; - case FieldDescriptor::TYPE_INT32: - format("Int32"); - break; - case FieldDescriptor::TYPE_UINT64: - format("UInt64"); - break; - case FieldDescriptor::TYPE_SINT64: - format("SInt64"); - break; - case FieldDescriptor::TYPE_INT64: - format("Int64"); - break; - case FieldDescriptor::TYPE_BYTES: - format("Bytes"); - break; - case FieldDescriptor::TYPE_STRING: { - auto mode = GetUtf8CheckMode(field, options); - switch (mode) { - case Utf8CheckMode::kStrict: - format("Utf8String"); - break; - case Utf8CheckMode::kVerify: - format("RawString"); - break; - case Utf8CheckMode::kNone: - // Treat LITE_RUNTIME strings as bytes. - format("Bytes"); - break; - default: - GOOGLE_LOG(FATAL) << "Invalid Utf8CheckMode (" << static_cast(mode) - << ") for " << field->DebugString(); + static constexpr const char* kXFormNames[] = {nullptr, "Default", + "Table"}; + static_assert((fl::kTvDefault >> fl::kTvShift) == 1, ""); + static_assert((fl::kTvTable >> fl::kTvShift) == 2, ""); + + if (auto* xform = kXFormNames[tv_index]) { + format(" | ::_fl::kTv$1$", xform); } break; } - case FieldDescriptor::TYPE_GROUP: - format("Message | ::_fl::kRepGroup"); - if (UseDirectTcParserTable(field, options)) { - format(" | ::_fl::kTvTable"); - } else { - format(" | ::_fl::kTvDefault"); - } + case fl::kFkMap: + format(" | ::_fl::kMap"); break; - case FieldDescriptor::TYPE_MESSAGE: - if (field->is_map()) { - format("Map"); - } else { - format("Message"); - if (IsLazy(field, options, scc_analyzer)) { - format(" | ::_fl::kRepLazy"); - } else if (IsImplicitWeakField(field, options, scc_analyzer)) { - format(" | ::_fl::kRepIWeak"); - } - if (UseDirectTcParserTable(field, options)) { - format(" | ::_fl::kTvTable"); - } else { - format(" | ::_fl::kTvDefault"); - } - } + + case fl::kFkNone: break; - } - // Fill in extra information about string and bytes field representations. - if (field->type() == FieldDescriptor::TYPE_BYTES || - field->type() == FieldDescriptor::TYPE_STRING) { - if (field->is_repeated()) { - format(" | ::_fl::kRepSString"); - } else { - format(" | ::_fl::kRepAString"); + case fl::kFkVarint: + case fl::kFkPackedVarint: + case fl::kFkFixed: + case fl::kFkPackedFixed: { + switch (type_card & ~fl::kFcMask & ~fl::kSplitMask) { + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Bool); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Fixed32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(UInt32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(SFixed32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Int32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(SInt32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Float); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Enum); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(EnumRange); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(OpenEnum); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Fixed64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(UInt64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(SFixed64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Int64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(SInt64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(Double); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedBool); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFixed32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedUInt32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSFixed32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedInt32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSInt32); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFloat); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedEnum); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedEnumRange); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedOpenEnum); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedFixed64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedUInt64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSFixed64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedInt64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedSInt64); + PROTOBUF_INTERNAL_TYPE_CARD_CASE(PackedDouble); + default: + GOOGLE_LOG(FATAL) << "Unknown type_card: 0x" << type_card; + } } } - if (ShouldSplit(field, options)) { + if (type_card & fl::kSplitMask) { format(" | ::_fl::kSplitTrue"); } +#undef PROTOBUF_INTERNAL_TYPE_CARD_CASE + format(")"); } @@ -1112,72 +815,43 @@ void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) { format("0, "); } format("$1$,\n ", entry.aux_idx); - FormatFieldKind(format, entry, options_, scc_analyzer_); + FormatFieldKind(format, entry); } format("},\n"); } } -static constexpr int kMaxNameLength = 255; - -int ParseFunctionGenerator::CalculateFieldNamesSize() const { - // The full name of the message appears first. - int size = std::min(static_cast(descriptor_->full_name().size()), - kMaxNameLength); - int lengths_size = 1; - for (const auto& entry : tc_table_info_->field_entries) { - const FieldDescriptor* field = entry.field; - GOOGLE_CHECK_LE(field->name().size(), kMaxNameLength); - size += field->name().size(); - lengths_size += 1; - } - // align to an 8-byte boundary - lengths_size = (lengths_size + 7) & -8; - return size + lengths_size + 1; -} +void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) { + // We could just output the bytes directly, but we want it to look better than + // that in the source code. Also, it is more efficient for compilation time to + // have a literal string than an initializer list of chars. -static void FormatOctal(Formatter& format, int size) { - int octal_size = ((size >> 6) & 3) * 100 + // - ((size >> 3) & 7) * 10 + // - ((size >> 0) & 7); - format("\\$1$", octal_size); -} + const int total_sizes = + static_cast(((tc_table_info_->field_entries.size() + 1) + 7) & ~7); + const uint8_t* p = tc_table_info_->field_name_data.data(); + const uint8_t* sizes = p; + const uint8_t* sizes_end = sizes + total_sizes; -void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) { - // First, we output the size of each string, as an unsigned byte. The first - // string is the message name. - int count = 1; + // First print all the sizes as octal format("\""); - FormatOctal(format, - std::min(static_cast(descriptor_->full_name().size()), 255)); - for (const auto& entry : tc_table_info_->field_entries) { - FormatOctal(format, entry.field->name().size()); - ++count; - } - while (count & 7) { // align to an 8-byte boundary - format("\\0"); - ++count; + for (int i = 0; i < total_sizes; ++i) { + int size = *p++; + int octal_size = ((size >> 6) & 3) * 100 + // + ((size >> 3) & 7) * 10 + // + ((size >> 0) & 7); + format("\\$1$", octal_size); } format("\"\n"); - // The message name is stored at the beginning of the string - std::string message_name = descriptor_->full_name(); - if (message_name.size() > kMaxNameLength) { - static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2; - message_name = StrCat( - message_name.substr(0, kNameHalfLength), "...", - message_name.substr(message_name.size() - kNameHalfLength)); - } - format("\"$1$\"\n", message_name); - // Then we output the actual field names - for (const auto& entry : tc_table_info_->field_entries) { - const FieldDescriptor* field = entry.field; - format("\"$1$\"\n", field->name()); + + // Then print each name in a line of its own + for (; sizes < sizes_end && sizes[0] != 0; p += *sizes++) { + format("\"$1$\"\n", std::string(p, p + *sizes)); } } void ParseFunctionGenerator::GenerateArenaString(Formatter& format, const FieldDescriptor* field) { - if (HasHasbit(field)) { + if (internal::cpp::HasHasbit(field)) { format("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field)); } format( @@ -1239,7 +913,9 @@ void ParseFunctionGenerator::GenerateStrings(Formatter& format, // to verify UTF8 when we already know parsing failed. format("CHK_(ptr);\n"); if (!check_utf8) return; // return if this is a bytes field - auto level = GetUtf8CheckMode(field, options_); + auto level = internal::cpp::GetUtf8CheckMode( + field, + GetOptimizeFor(field->file(), options_) == FileOptions::LITE_RUNTIME); switch (level) { case Utf8CheckMode::kNone: return; @@ -1274,7 +950,7 @@ void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format, const FieldDescriptor* field) { if (field->is_packable()) { if (field->type() == FieldDescriptor::TYPE_ENUM && - !HasPreservingUnknownEnumSemantics(field)) { + !internal::cpp::HasPreservingUnknownEnumSemantics(field)) { std::string enum_type = QualifiedClassName(field->enum_type(), options_); format( "ptr = " @@ -1303,7 +979,7 @@ void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format, const FieldDescriptor* val = field->message_type()->map_value(); GOOGLE_CHECK(val); if (val->type() == FieldDescriptor::TYPE_ENUM && - !HasPreservingUnknownEnumSemantics(field)) { + !internal::cpp::HasPreservingUnknownEnumSemantics(field)) { format( "auto object = " "::$proto_ns$::internal::InitEnumParseWrapper<" @@ -1337,7 +1013,7 @@ void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format, "}\n" "auto* lazy_field = $msg$$field$;\n", field->containing_oneof()->name()); - } else if (HasHasbit(field)) { + } else if (internal::cpp::HasHasbit(field)) { format( "_Internal::set_has_$name$(&$has_bits$);\n" "auto* lazy_field = &$msg$$field$;\n"); @@ -1428,12 +1104,12 @@ void ParseFunctionGenerator::GenerateFieldBody( format( "$uint64$ val = ::$proto_ns$::internal::ReadVarint64(&ptr);\n" "CHK_(ptr);\n"); - if (!HasPreservingUnknownEnumSemantics(field)) { + if (!internal::cpp::HasPreservingUnknownEnumSemantics(field)) { format("if (PROTOBUF_PREDICT_TRUE($enum_type$_IsValid(val))) {\n"); format.Indent(); } format("$msg$_internal_$put_field$(static_cast<$enum_type$>(val));\n"); - if (!HasPreservingUnknownEnumSemantics(field)) { + if (!internal::cpp::HasPreservingUnknownEnumSemantics(field)) { format.Outdent(); format( "} else {\n" @@ -1460,7 +1136,7 @@ void ParseFunctionGenerator::GenerateFieldBody( "CHK_(ptr);\n", zigzag, size); } else { - if (HasHasbit(field)) { + if (internal::cpp::HasHasbit(field)) { format("_Internal::set_has_$name$(&$has_bits$);\n"); } format( @@ -1479,7 +1155,7 @@ void ParseFunctionGenerator::GenerateFieldBody( "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr));\n" "ptr += sizeof($primitive_type$);\n"); } else { - if (HasHasbit(field)) { + if (internal::cpp::HasHasbit(field)) { format("_Internal::set_has_$name$(&$has_bits$);\n"); } format( @@ -1667,126 +1343,12 @@ void ParseFunctionGenerator::GenerateFieldSwitch( format("} // switch\n"); } -namespace { - +#if 0 void PopulateFastFieldEntry(const Descriptor* descriptor, const TailCallTableInfo::FieldEntryInfo& entry, const Options& options, TailCallTableInfo::FastFieldInfo& info) { - const FieldDescriptor* field = entry.field; - std::string name; - uint8_t aux_idx = static_cast(entry.aux_idx); - - switch (field->type()) { - case FieldDescriptor::TYPE_FIXED32: - case FieldDescriptor::TYPE_SFIXED32: - case FieldDescriptor::TYPE_FLOAT: - name.append("F32"); - break; - - case FieldDescriptor::TYPE_FIXED64: - case FieldDescriptor::TYPE_SFIXED64: - case FieldDescriptor::TYPE_DOUBLE: - name.append("F64"); - break; - - case FieldDescriptor::TYPE_BOOL: - name.append("V8"); - break; - case FieldDescriptor::TYPE_INT32: - case FieldDescriptor::TYPE_UINT32: - name.append("V32"); - break; - case FieldDescriptor::TYPE_INT64: - case FieldDescriptor::TYPE_UINT64: - name.append("V64"); - break; - - case FieldDescriptor::TYPE_ENUM: - if (HasPreservingUnknownEnumSemantics(field)) { - name.append("V32"); - break; - } - if (field->is_repeated() && field->is_packed()) { - GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString(); - return; - } - if (entry.is_enum_range) { - name.append("Er"); - if (entry.enum_range_max <= 127) { - if (entry.enum_range_min == 0) { - name.append("0"); - aux_idx = entry.enum_range_max; - } else if (entry.enum_range_min == 1) { - name.append("1"); - aux_idx = entry.enum_range_max; - } - } - } else { - name.append("Ev"); - } - break; - - case FieldDescriptor::TYPE_SINT32: - name.append("Z32"); - break; - case FieldDescriptor::TYPE_SINT64: - name.append("Z64"); - break; - - case FieldDescriptor::TYPE_BYTES: - name.append("B"); - if (IsStringInlined(field, options)) { - name.append("i"); - } - break; - case FieldDescriptor::TYPE_STRING: - switch (GetUtf8CheckMode(field, options)) { - case Utf8CheckMode::kNone: - name.append("B"); - break; - case Utf8CheckMode::kVerify: - name.append("S"); - break; - case Utf8CheckMode::kStrict: - name.append("U"); - break; - default: - GOOGLE_LOG(DFATAL) << "Mode not handled: " - << static_cast(GetUtf8CheckMode(field, options)); - return; - } - if (IsStringInlined(field, options)) { - name.append("i"); - GOOGLE_CHECK(!field->is_repeated()); - aux_idx = static_cast(entry.inlined_string_idx); - } - break; - - case FieldDescriptor::TYPE_MESSAGE: - name.append(UseDirectTcParserTable(field, options) ? "Mt" : "Md"); - break; - case FieldDescriptor::TYPE_GROUP: - name.append(UseDirectTcParserTable(field, options) ? "Gt" : "Gd"); - break; - - default: - GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString(); - return; - } - - // The field implementation functions are prefixed by cardinality: - // `S` for optional or implicit fields. - // `R` for non-packed repeated. - // `P` for packed repeated. - name.append(field->is_packed() ? "P" - : field->is_repeated() ? "R" - : field->real_containing_oneof() ? "O" - : "S"); - - // Append the tag length. Fast parsing only handles 1- or 2-byte tags. - name.append(TagSize(field->number()) == 1 ? "1" : "2"); - + ..... if (name == "V8S1") { info.func_name = StrCat( "::_pbi::TcParser::SingularVarintNoZag1 #include #include +#include namespace google { namespace protobuf { namespace compiler { namespace cpp { -// Helper class for generating tailcall parsing functions. -struct TailCallTableInfo { - TailCallTableInfo(const Descriptor* descriptor, const Options& options, - const std::vector& ordered_fields, - const std::vector& has_bit_indices, - const std::vector& inlined_string_indices, - MessageSCCAnalyzer* scc_analyzer); - - // Fields parsed by the table fast-path. - struct FastFieldInfo { - std::string func_name; - const FieldDescriptor* field; - uint16_t coded_tag; - uint8_t hasbit_idx; - uint8_t aux_idx; - }; - std::vector fast_path_fields; - - // Fields parsed by mini parsing routines. - struct FieldEntryInfo { - const FieldDescriptor* field; - int hasbit_idx; - int inlined_string_idx; - uint16_t aux_idx; - // True for enums entirely covered by the start/length fields of FieldAux: - bool is_enum_range; - int32_t enum_range_min; - int32_t enum_range_max; - }; - std::vector field_entries; - std::vector aux_entries; - - // Fields parsed by generated fallback function. - std::vector fallback_fields; - - // Table size. - int table_size_log2; - // Mask for has-bits of required fields. - uint32_t has_hasbits_required_mask; - // True if a generated fallback function is required instead of generic. - bool use_generated_fallback; -}; - // ParseFunctionGenerator generates the _InternalParse function for a message // (and any associated supporting members). class ParseFunctionGenerator { @@ -113,6 +71,8 @@ class ParseFunctionGenerator { void GenerateDataDefinitions(io::Printer* printer); private: + class GeneratedOptionProvider; + // Returns true if tailcall table code should be generated. bool should_generate_tctable() const; @@ -136,7 +96,6 @@ class ParseFunctionGenerator { void GenerateTailCallTable(Formatter& format); void GenerateFastFieldEntries(Formatter& format); void GenerateFieldEntries(Formatter& format); - int CalculateFieldNamesSize() const; void GenerateFieldNames(Formatter& format); // Generates parsing code for an `ArenaString` field. @@ -168,7 +127,7 @@ class ParseFunctionGenerator { MessageSCCAnalyzer* scc_analyzer_; const Options& options_; std::map variables_; - std::unique_ptr tc_table_info_; + std::unique_ptr tc_table_info_; std::vector inlined_string_indices_; const std::vector ordered_fields_; int num_hasbits_; diff --git a/src/google/protobuf/compiler/cpp/string_field.cc b/src/google/protobuf/compiler/cpp/string_field.cc index 3c90dc001d..9629e73df2 100644 --- a/src/google/protobuf/compiler/cpp/string_field.cc +++ b/src/google/protobuf/compiler/cpp/string_field.cc @@ -286,7 +286,7 @@ void StringFieldGenerator::GenerateInlineAccessorDefinitions( "$maybe_prepare_split_message$" " // @@protoc_insertion_point(field_release:$full_name$)\n"); - if (HasHasbit(descriptor_)) { + if (internal::cpp::HasHasbit(descriptor_)) { format( " if (!_internal_has_$name$()) {\n" " return nullptr;\n" @@ -375,7 +375,7 @@ void StringFieldGenerator::GenerateMessageClearingCode( // If we have a hasbit, then the Clear() method of the protocol buffer // will have checked that this field is set. If so, we can avoid redundant // checks against the default variable. - const bool must_be_present = HasHasbit(descriptor_); + const bool must_be_present = internal::cpp::HasHasbit(descriptor_); if (inlined_ && must_be_present) { // Calling mutable_$name$() gives us a string reference and sets the has bit @@ -451,7 +451,7 @@ void StringFieldGenerator::GenerateCopyConstructorCode( format("new (&_this->$field$) ::_pbi::InlinedStringField();\n"); } - if (HasHasbit(descriptor_)) { + if (internal::cpp::HasHasbit(descriptor_)) { format("if (from._internal_has_$name$()) {\n"); } else { format("if (!from._internal_$name$().empty()) {\n"); diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.h b/src/google/protobuf/compiler/csharp/csharp_helpers.h index 836bd5de5e..1d07359de5 100644 --- a/src/google/protobuf/compiler/csharp/csharp_helpers.h +++ b/src/google/protobuf/compiler/csharp/csharp_helpers.h @@ -91,7 +91,9 @@ std::string GetOneofCaseName(const FieldDescriptor* descriptor); int GetFixedSize(FieldDescriptor::Type type); -std::string UnderscoresToCamelCase(const std::string& input, +// Note that we wouldn't normally want to export this (we're not expecting +// it to be used outside libprotoc itself) but this exposes it for testing. +std::string PROTOC_EXPORT UnderscoresToCamelCase(const std::string& input, bool cap_next_letter, bool preserve_period); diff --git a/src/google/protobuf/compiler/python/generator.cc b/src/google/protobuf/compiler/python/generator.cc index 70b08042bb..068b4fd12d 100644 --- a/src/google/protobuf/compiler/python/generator.cc +++ b/src/google/protobuf/compiler/python/generator.cc @@ -90,29 +90,7 @@ std::string ModuleAlias(const std::string& filename) { // in proto2/public/reflection.py. const char kDescriptorKey[] = "DESCRIPTOR"; - -// file output by this generator. -void PrintTopBoilerplate(io::Printer* printer, const FileDescriptor* file, - bool descriptor_proto) { - // TODO(robinson): Allow parameterization of Python version? - printer->Print( - "# -*- coding: utf-8 -*-\n" - "# Generated by the protocol buffer compiler. DO NOT EDIT!\n" - "# source: $filename$\n" - "\"\"\"Generated protocol buffer code.\"\"\"\n", - "filename", file->name()); - printer->Print( - "from google.protobuf.internal import builder as _builder\n" - "from google.protobuf import descriptor as _descriptor\n" - "from google.protobuf import descriptor_pool as " - "_descriptor_pool\n" - "from google.protobuf import symbol_database as " - "_symbol_database\n"); - - printer->Print("# @@protoc_insertion_point(imports)\n\n"); - printer->Print("_sym_db = _symbol_database.Default()\n"); - printer->Print("\n\n"); -} +const char kThirdPartyPrefix[] = "google3.third_party.py."; // Returns a Python literal giving the default value for a field. // If the field specifies no explicit default value, we'll return @@ -217,12 +195,18 @@ bool Generator::Generate(const FileDescriptor* file, GeneratorContext* context, std::string* error) const { // ----------------------------------------------------------------- // parse generator options + bool bootstrap = false; std::vector > options; ParseGeneratorParameter(parameter, &options); for (int i = 0; i < options.size(); i++) { - if (options[i].first == "pyi_out") { + if (!opensource_runtime_ && + options[i].first == "no_enforce_api_compatibility") { + // TODO(b/241584880): remove this legacy option, it has no effect. + } else if (!opensource_runtime_ && options[i].first == "bootstrap") { + bootstrap = true; + } else if (options[i].first == "pyi_out") { python::PyiGenerator pyi_generator; if (!pyi_generator.Generate(file, "", context, error)) { return false; @@ -249,13 +233,50 @@ bool Generator::Generate(const FileDescriptor* file, file_->CopyTo(&fdp); fdp.SerializeToString(&file_descriptor_serialized_); + if (!opensource_runtime_ && GeneratingDescriptorProto()) { + std::string bootstrap_filename = + "net/proto2/python/internal/descriptor_pb2.py"; + if (bootstrap) { + filename = bootstrap_filename; + } else { + std::unique_ptr output(context->Open(filename)); + io::Printer printer(output.get(), '$'); + printer.Print( + "from $internal_package$ import descriptor_pb2\n" + "\n", + "internal_package", InternalPackage()); + + // For static checkers, we need to explicitly assign to the symbols we + // publicly export. + for (int i = 0; i < file_->message_type_count(); i++) { + const Descriptor* message = file_->message_type(i); + printer.Print("$name$ = descriptor_pb2.$name$\n", "name", + message->name()); + } + + // Sadly some clients access our internal variables (starting with "_"). + // To support them, we iterate over *all* symbols to expose even the + // private ones. Statically type-checked code should (especially) never + // use these, so we don't worry about making them available to pytype + // checks. + printer.Print( + "\n" + "globals().update(descriptor_pb2.__dict__)\n" + "\n"); + + printer.Print( + "# @@protoc_insertion_point(module_scope)\n" + "\n"); + return true; + } + } std::unique_ptr output(context->Open(filename)); GOOGLE_CHECK(output.get()); io::Printer printer(output.get(), '$'); printer_ = &printer; - PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto()); + PrintTopBoilerplate(); PrintImports(); PrintFileDescriptor(); if (GeneratingDescriptorProto()) { @@ -277,6 +298,9 @@ bool Generator::Generate(const FileDescriptor* file, printer_->Outdent(); } std::string module_name = ModuleName(file->name()); + if (!opensource_runtime_) { + module_name = StripPrefixString(module_name, kThirdPartyPrefix); + } printer_->Print( "_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, '$module_name$', " "globals())\n", @@ -310,6 +334,34 @@ bool Generator::Generate(const FileDescriptor* file, return !printer.failed(); } +// file output by this generator. +void Generator::PrintTopBoilerplate() const { + // TODO(robinson): Allow parameterization of Python version? + printer_->Print( + "# -*- coding: utf-8 -*-\n" + "# Generated by the protocol buffer compiler. DO NOT EDIT!\n" + "# source: $filename$\n" + "\"\"\"Generated protocol buffer code.\"\"\"\n", + "filename", file_->name()); + if (!opensource_runtime_) { + // This import is needed so that compatibility proto1 compiler output + // inserted at protoc_insertion_point can refer to other protos like + // google3.a.b.c. Code generated by proto2 compiler doesn't do it, and + // instead uses aliases assigned when importing modules. + printer_->Print("import google3\n"); + } + printer_->Print( + "from $internal_package$ import builder as _builder\n" + "from $public_package$ import descriptor as _descriptor\n" + "from $public_package$ import descriptor_pool as _descriptor_pool\n" + "from $public_package$ import symbol_database as _symbol_database\n", + "internal_package", InternalPackage(), "public_package", PublicPackage()); + + printer_->Print("# @@protoc_insertion_point(imports)\n\n"); + printer_->Print("_sym_db = _symbol_database.Default()\n"); + printer_->Print("\n\n"); +} + // Prints Python imports for all modules imported by |file|. void Generator::PrintImports() const { for (int i = 0; i < file_->dependency_count(); ++i) { @@ -317,6 +369,9 @@ void Generator::PrintImports() const { std::string module_name = ModuleName(filename); std::string module_alias = ModuleAlias(filename); + if (!opensource_runtime_) { + module_name = StripPrefixString(module_name, kThirdPartyPrefix); + } if (ContainsPythonKeyword(module_name)) { // If the module path contains a Python keyword, we have to quote the // module name and import it using importlib. Otherwise the usual kind of @@ -347,6 +402,9 @@ void Generator::PrintImports() const { // Print public imports. for (int i = 0; i < file_->public_dependency_count(); ++i) { std::string module_name = ModuleName(file_->public_dependency(i)->name()); + if (!opensource_runtime_) { + module_name = StripPrefixString(module_name, kThirdPartyPrefix); + } printer_->Print("from $module$ import *\n", "module", module_name); } printer_->Print("\n"); @@ -517,6 +575,9 @@ void Generator::PrintDescriptorKeyAndModuleName( printer_->Print("$descriptor_key$ = $descriptor_name$,\n", "descriptor_key", kDescriptorKey, "descriptor_name", name); std::string module_name = ModuleName(file_->name()); + if (!opensource_runtime_) { + module_name = StripPrefixString(module_name, kThirdPartyPrefix); + } printer_->Print("__module__ = '$module_name$'\n", "module_name", module_name); } @@ -706,6 +767,9 @@ void Generator::PrintMessage(const Descriptor& message_descriptor, m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor); printer_->Print(m, "'$descriptor_key$' : $descriptor_name$,\n"); std::string module_name = ModuleName(file_->name()); + if (!opensource_runtime_) { + module_name = StripPrefixString(module_name, kThirdPartyPrefix); + } printer_->Print("'__module__' : '$module_name$'\n", "module_name", module_name); printer_->Print("# @@protoc_insertion_point(class_scope:$full_name$)\n", @@ -1114,6 +1178,16 @@ std::string Generator::ModuleLevelServiceDescriptorName( return name; } +std::string Generator::PublicPackage() const { + return opensource_runtime_ ? "google.protobuf" + : "google3.net.google.protobuf.python.public"; +} + +std::string Generator::InternalPackage() const { + return opensource_runtime_ ? "google.protobuf.internal" + : "google3.net.google.protobuf.python.internal"; +} + // Prints standard constructor arguments serialized_start and serialized_end. // Args: // descriptor: The cpp descriptor to have a serialized reference. diff --git a/src/google/protobuf/compiler/python/generator.h b/src/google/protobuf/compiler/python/generator.h index 46b0ad1836..ed077eb651 100644 --- a/src/google/protobuf/compiler/python/generator.h +++ b/src/google/protobuf/compiler/python/generator.h @@ -132,6 +132,7 @@ class PROTOC_EXPORT Generator : public CodeGenerator { const FieldDescriptor& extension_field) const; void FixForeignFieldsInNestedExtensions(const Descriptor& descriptor) const; + void PrintTopBoilerplate() const; void PrintServices() const; void PrintServiceDescriptors() const; void PrintServiceDescriptor(const ServiceDescriptor& descriptor) const; @@ -149,6 +150,8 @@ class PROTOC_EXPORT Generator : public CodeGenerator { std::string ModuleLevelMessageName(const Descriptor& descriptor) const; std::string ModuleLevelServiceDescriptorName( const ServiceDescriptor& descriptor) const; + std::string PublicPackage() const; + std::string InternalPackage() const; template void PrintSerializedPbInterval(const DescriptorT& descriptor, diff --git a/src/google/protobuf/compiler/python/pyi_generator.cc b/src/google/protobuf/compiler/python/pyi_generator.cc index e954fd317f..c542b10def 100644 --- a/src/google/protobuf/compiler/python/pyi_generator.cc +++ b/src/google/protobuf/compiler/python/pyi_generator.cc @@ -66,6 +66,16 @@ std::string PyiGenerator::ModuleLevelName(const DescriptorT& descriptor) const { return name; } +std::string PyiGenerator::PublicPackage() const { + return opensource_runtime_ ? "google.protobuf" + : "google3.net.google.protobuf.python.public"; +} + +std::string PyiGenerator::InternalPackage() const { + return opensource_runtime_ ? "google.protobuf.internal" + : "google3.net.google.protobuf.python.internal"; +} + struct ImportModules { bool has_repeated = false; // _containers bool has_iterable = false; // typing.Iterable @@ -182,6 +192,10 @@ void PyiGenerator::PrintImports() const { if (file_->enum_type_count() > 0) { import_modules.has_enums = true; } + if (!opensource_runtime_ && file_->service_count() > 0) { + import_modules.has_optional = true; + import_modules.has_union = true; + } for (int i = 0; i < file_->message_type_count(); i++) { CheckImportModules(file_->message_type(i), &import_modules); } @@ -190,37 +204,50 @@ void PyiGenerator::PrintImports() const { // required in the proto file. if (import_modules.has_repeated) { printer_->Print( - "from google.protobuf.internal import containers as " - "_containers\n"); + "from $internal_package$ import containers as _containers\n", + "internal_package", InternalPackage()); } if (import_modules.has_enums) { printer_->Print( - "from google.protobuf.internal import enum_type_wrapper" - " as _enum_type_wrapper\n"); + "from $internal_package$ import enum_type_wrapper as " + "_enum_type_wrapper\n", + "internal_package", InternalPackage()); } if (import_modules.has_extendable) { printer_->Print( - "from google.protobuf.internal import python_message" - " as _python_message\n"); + "from $internal_package$ import python_message as _python_message\n", + "internal_package", InternalPackage()); } if (import_modules.has_well_known_type) { printer_->Print( - "from google.protobuf.internal import well_known_types" - " as _well_known_types\n"); + "from $internal_package$ import well_known_types as " + "_well_known_types\n", + "internal_package", InternalPackage()); } - printer_->Print( - "from google.protobuf import" - " descriptor as _descriptor\n"); + printer_->Print("from $public_package$ import descriptor as _descriptor\n", + "public_package", PublicPackage()); if (import_modules.has_messages) { - printer_->Print( - "from google.protobuf import message as _message\n"); + printer_->Print("from $public_package$ import message as _message\n", + "public_package", PublicPackage()); } - if (HasGenericServices(file_)) { - printer_->Print( - "from google.protobuf import service as" - " _service\n"); + if (opensource_runtime_) { + if (HasGenericServices(file_)) { + printer_->Print("from $public_package$ import service as _service\n", + "public_package", PublicPackage()); + } + } else { + if (file_->service_count() > 0) { + printer_->Print( + "from google3.net.rpc.python import proto_python_api_2_stub as " + "_proto_python_api_2_stub\n" + "from google3.net.rpc.python import pywraprpc as _pywraprpc\n" + "from google3.net.rpc.python import rpcserver as _rpcserver\n"); + } } printer_->Print("from typing import "); + if (!opensource_runtime_ && file_->service_count() > 0) { + printer_->Print("Any as _Any, "); + } printer_->Print("ClassVar as _ClassVar"); if (import_modules.has_iterable) { printer_->Print(", Iterable as _Iterable"); @@ -514,6 +541,7 @@ void PyiGenerator::PrintServices() const { } } + bool PyiGenerator::Generate(const FileDescriptor* file, const std::string& parameter, GeneratorContext* context, @@ -522,6 +550,8 @@ bool PyiGenerator::Generate(const FileDescriptor* file, import_map_.clear(); // Calculate file name. file_ = file; + // In google3, devtools/python/blaze/pytype/pytype_impl.bzl uses --pyi_out to + // directly set the output file name. std::string filename = parameter.empty() ? GetFileName(file, ".pyi") : parameter; @@ -552,7 +582,7 @@ bool PyiGenerator::Generate(const FileDescriptor* file, PrintExtensions(*file_); PrintMessages(); - if (HasGenericServices(file)) { + if (opensource_runtime_ && HasGenericServices(file)) { PrintServices(); } return true; diff --git a/src/google/protobuf/compiler/python/pyi_generator.h b/src/google/protobuf/compiler/python/pyi_generator.h index 40741e1e78..be9d8a69cd 100644 --- a/src/google/protobuf/compiler/python/pyi_generator.h +++ b/src/google/protobuf/compiler/python/pyi_generator.h @@ -90,6 +90,10 @@ class PROTOC_EXPORT PyiGenerator : public google::protobuf::compiler::CodeGenera const FieldDescriptor& field_des, const Descriptor& containing_des) const; template std::string ModuleLevelName(const DescriptorT& descriptor) const; + std::string PublicPackage() const; + std::string InternalPackage() const; + + bool opensource_runtime_ = true; // Very coarse-grained lock to ensure that Generate() is reentrant. // Guards file_, printer_, and import_map_. diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index 139e6f892f..cbc9660cef 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -8335,6 +8335,48 @@ void LazyDescriptor::Once(const ServiceDescriptor* service) { } } +namespace cpp { +bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) { + return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3; +} + +bool HasHasbit(const FieldDescriptor* field) { + // This predicate includes proto3 message fields only if they have "optional". + // Foo submsg1 = 1; // HasHasbit() == false + // optional Foo submsg2 = 2; // HasHasbit() == true + // This is slightly odd, as adding "optional" to a singular proto3 field does + // not change the semantics or API. However whenever any field in a message + // has a hasbit, it forces reflection to include hasbit offsets for *all* + // fields, even if almost all of them are set to -1 (no hasbit). So to avoid + // causing a sudden size regression for ~all proto3 messages, we give proto3 + // message fields a hasbit only if "optional" is present. If the user is + // explicitly writing "optional", it is likely they are writing it on + // primitive fields also. + return (field->has_optional_keyword() || field->is_required()) && + !field->options().weak(); +} + +static bool FieldEnforceUtf8(const FieldDescriptor* field) { + return true; +} + +static bool FileUtf8Verification(const FileDescriptor* file) { + return true; +} + +// Which level of UTF-8 enforcemant is placed on this file. +Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, bool is_lite) { + if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 && + FieldEnforceUtf8(field)) { + return Utf8CheckMode::kStrict; + } else if (!is_lite && FileUtf8Verification(field->file())) { + return Utf8CheckMode::kVerify; + } else { + return Utf8CheckMode::kNone; + } +} + +} // namespace cpp } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index beb3e641c6..367027e744 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -59,6 +59,7 @@ #include #include +#include #include #include #include @@ -2438,6 +2439,75 @@ inline FileDescriptor::Syntax FileDescriptor::syntax() const { return static_cast(syntax_); } +namespace internal { + +// FieldRange(desc) provides an iterable range for the fields of a +// descriptor type, appropriate for range-for loops. + +template +struct FieldRangeImpl; + +template +FieldRangeImpl FieldRange(const T* desc) { + return {desc}; +} + +template +struct FieldRangeImpl { + struct Iterator { + using iterator_category = std::forward_iterator_tag; + using value_type = const FieldDescriptor*; + using difference_type = int; + + value_type operator*() { return descriptor->field(idx); } + + friend bool operator==(const Iterator& a, const Iterator& b) { + GOOGLE_DCHECK(a.descriptor == b.descriptor); + return a.idx == b.idx; + } + friend bool operator!=(const Iterator& a, const Iterator& b) { + return !(a == b); + } + + Iterator& operator++() { + idx++; + return *this; + } + + int idx; + const T* descriptor; + }; + + Iterator begin() const { return {0, descriptor}; } + Iterator end() const { return {descriptor->field_count(), descriptor}; } + + const T* descriptor; +}; + +// The context for these functions under `cpp` is "for the C++ implementation". +// In particular, questions like "does this field have a has bit?" have a +// different answer depending on the language. +namespace cpp { +// Returns true if 'enum' semantics are such that unknown values are preserved +// in the enum field itself, rather than going to the UnknownFieldSet. +PROTOBUF_EXPORT bool HasPreservingUnknownEnumSemantics( + const FieldDescriptor* field); + +PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field); + +#ifndef SWIG +enum class Utf8CheckMode { + kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields. + kVerify = 1, // Only log an error but parsing will succeed. + kNone = 2, // No UTF-8 check. +}; +PROTOBUF_EXPORT Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, + bool is_lite); +#endif // !SWIG + +} // namespace cpp +} // namespace internal + } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/dynamic_message.cc b/src/google/protobuf/dynamic_message.cc index 3940564583..9214de6f64 100644 --- a/src/google/protobuf/dynamic_message.cc +++ b/src/google/protobuf/dynamic_message.cc @@ -105,22 +105,6 @@ namespace { bool IsMapFieldInApi(const FieldDescriptor* field) { return field->is_map(); } -// Sync with helpers.h. -inline bool HasHasbit(const FieldDescriptor* field) { - // This predicate includes proto3 message fields only if they have "optional". - // Foo submsg1 = 1; // HasHasbit() == false - // optional Foo submsg2 = 2; // HasHasbit() == true - // This is slightly odd, as adding "optional" to a singular proto3 field does - // not change the semantics or API. However whenever any field in a message - // has a hasbit, it forces reflection to include hasbit offsets for *all* - // fields, even if almost all of them are set to -1 (no hasbit). So to avoid - // causing a sudden size regression for ~all proto3 messages, we give proto3 - // message fields a hasbit only if "optional" is present. If the user is - // explicitly writing "optional", it is likely they are writing it on - // primitive fields also. - return (field->has_optional_keyword() || field->is_required()) && - !field->options().weak(); -} inline bool InRealOneof(const FieldDescriptor* field) { return field->containing_oneof() && @@ -705,7 +689,7 @@ const Message* DynamicMessageFactory::GetPrototypeNoLock( type_info->has_bits_offset = -1; int max_hasbit = 0; for (int i = 0; i < type->field_count(); i++) { - if (HasHasbit(type->field(i))) { + if (internal::cpp::HasHasbit(type->field(i))) { if (type_info->has_bits_offset == -1) { // At least one field in the message requires a hasbit, so allocate // hasbits. diff --git a/src/google/protobuf/extension_set.h b/src/google/protobuf/extension_set.h index 0e6d052110..efebf15401 100644 --- a/src/google/protobuf/extension_set.h +++ b/src/google/protobuf/extension_set.h @@ -1262,6 +1262,8 @@ class EnumTypeTraits { template static void Register(int number, FieldType type, bool is_packed, LazyEagerVerifyFnType fn) { + // Avoid -Wunused-parameter + (void)fn; ExtensionSet::RegisterEnumExtension(&ExtendeeT::default_instance(), number, type, false, is_packed, IsValid); } @@ -1328,6 +1330,8 @@ class RepeatedEnumTypeTraits { template static void Register(int number, FieldType type, bool is_packed, LazyEagerVerifyFnType fn) { + // Avoid -Wunused-parameter + (void)fn; ExtensionSet::RegisterEnumExtension(&ExtendeeT::default_instance(), number, type, true, is_packed, IsValid); } diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index 70014b2abb..fab8f2eb59 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include @@ -48,6 +50,8 @@ #include #include #include +#include +#include #include #include #include @@ -289,6 +293,12 @@ Reflection::Reflection(const Descriptor* descriptor, last_non_weak_field_index_ = descriptor_->field_count() - 1; } +Reflection::~Reflection() { + // No need to use sized delete. This code path is uncommon and it would not be + // worth saving or recalculating the size. + ::operator delete(const_cast(tcparse_table_)); +} + const UnknownFieldSet& Reflection::GetUnknownFields( const Message& message) const { return GetInternalMetadata(message).unknown_fields( @@ -2962,6 +2972,355 @@ const MapFieldBase* Reflection::GetMapData(const Message& message, return &(GetRaw(message, field)); } +template +static uint32_t AlignTo(uint32_t v) { + return (v + alignof(T) - 1) & ~(alignof(T) - 1); +} + +static internal::TailCallParseFunc GetFastParseFunction( + const std::string& name) { + // This list must be synchronized with TcParser. + // Missing entries are replaced with MiniParse in opt mode to avoid runtime + // failures. It check-fails in debug mode. + static const auto* const map = + new std::unordered_map{ + {"::_pbi::TcParser::FastF32S1", internal::TcParser::FastF32S1}, + {"::_pbi::TcParser::FastF32S2", internal::TcParser::FastF32S2}, + {"::_pbi::TcParser::FastF32R1", internal::TcParser::FastF32R1}, + {"::_pbi::TcParser::FastF32R2", internal::TcParser::FastF32R2}, + {"::_pbi::TcParser::FastF32P1", internal::TcParser::FastF32P1}, + {"::_pbi::TcParser::FastF32P2", internal::TcParser::FastF32P2}, + {"::_pbi::TcParser::FastF64S1", internal::TcParser::FastF64S1}, + {"::_pbi::TcParser::FastF64S2", internal::TcParser::FastF64S2}, + {"::_pbi::TcParser::FastF64R1", internal::TcParser::FastF64R1}, + {"::_pbi::TcParser::FastF64R2", internal::TcParser::FastF64R2}, + {"::_pbi::TcParser::FastF64P1", internal::TcParser::FastF64P1}, + {"::_pbi::TcParser::FastF64P2", internal::TcParser::FastF64P2}, + {"::_pbi::TcParser::FastV8S1", internal::TcParser::FastV8S1}, + {"::_pbi::TcParser::FastV8S2", internal::TcParser::FastV8S2}, + {"::_pbi::TcParser::FastV8R1", internal::TcParser::FastV8R1}, + {"::_pbi::TcParser::FastV8R2", internal::TcParser::FastV8R2}, + {"::_pbi::TcParser::FastV8P1", internal::TcParser::FastV8P1}, + {"::_pbi::TcParser::FastV8P2", internal::TcParser::FastV8P2}, + {"::_pbi::TcParser::FastV32S1", internal::TcParser::FastV32S1}, + {"::_pbi::TcParser::FastV32S2", internal::TcParser::FastV32S2}, + {"::_pbi::TcParser::FastV32R1", internal::TcParser::FastV32R1}, + {"::_pbi::TcParser::FastV32R2", internal::TcParser::FastV32R2}, + {"::_pbi::TcParser::FastV32P1", internal::TcParser::FastV32P1}, + {"::_pbi::TcParser::FastV32P2", internal::TcParser::FastV32P2}, + {"::_pbi::TcParser::FastV64S1", internal::TcParser::FastV64S1}, + {"::_pbi::TcParser::FastV64S2", internal::TcParser::FastV64S2}, + {"::_pbi::TcParser::FastV64R1", internal::TcParser::FastV64R1}, + {"::_pbi::TcParser::FastV64R2", internal::TcParser::FastV64R2}, + {"::_pbi::TcParser::FastV64P1", internal::TcParser::FastV64P1}, + {"::_pbi::TcParser::FastV64P2", internal::TcParser::FastV64P2}, + {"::_pbi::TcParser::FastZ32S1", internal::TcParser::FastZ32S1}, + {"::_pbi::TcParser::FastZ32S2", internal::TcParser::FastZ32S2}, + {"::_pbi::TcParser::FastZ32R1", internal::TcParser::FastZ32R1}, + {"::_pbi::TcParser::FastZ32R2", internal::TcParser::FastZ32R2}, + {"::_pbi::TcParser::FastZ32P1", internal::TcParser::FastZ32P1}, + {"::_pbi::TcParser::FastZ32P2", internal::TcParser::FastZ32P2}, + {"::_pbi::TcParser::FastZ64S1", internal::TcParser::FastZ64S1}, + {"::_pbi::TcParser::FastZ64S2", internal::TcParser::FastZ64S2}, + {"::_pbi::TcParser::FastZ64R1", internal::TcParser::FastZ64R1}, + {"::_pbi::TcParser::FastZ64R2", internal::TcParser::FastZ64R2}, + {"::_pbi::TcParser::FastZ64P1", internal::TcParser::FastZ64P1}, + {"::_pbi::TcParser::FastZ64P2", internal::TcParser::FastZ64P2}, + {"::_pbi::TcParser::FastErS1", internal::TcParser::FastErS1}, + {"::_pbi::TcParser::FastErS2", internal::TcParser::FastErS2}, + {"::_pbi::TcParser::FastErR1", internal::TcParser::FastErR1}, + {"::_pbi::TcParser::FastErR2", internal::TcParser::FastErR2}, + {"::_pbi::TcParser::FastEr0S1", internal::TcParser::FastEr0S1}, + {"::_pbi::TcParser::FastEr0S2", internal::TcParser::FastEr0S2}, + {"::_pbi::TcParser::FastEr0R1", internal::TcParser::FastEr0R1}, + {"::_pbi::TcParser::FastEr0R2", internal::TcParser::FastEr0R2}, + {"::_pbi::TcParser::FastEr1S1", internal::TcParser::FastEr1S1}, + {"::_pbi::TcParser::FastEr1S2", internal::TcParser::FastEr1S2}, + {"::_pbi::TcParser::FastEr1R1", internal::TcParser::FastEr1R1}, + {"::_pbi::TcParser::FastEr1R2", internal::TcParser::FastEr1R2}, + {"::_pbi::TcParser::FastEvS1", internal::TcParser::FastEvS1}, + {"::_pbi::TcParser::FastEvS2", internal::TcParser::FastEvS2}, + {"::_pbi::TcParser::FastEvR1", internal::TcParser::FastEvR1}, + {"::_pbi::TcParser::FastEvR2", internal::TcParser::FastEvR2}, + {"::_pbi::TcParser::FastBS1", internal::TcParser::FastBS1}, + {"::_pbi::TcParser::FastBS2", internal::TcParser::FastBS2}, + {"::_pbi::TcParser::FastBR1", internal::TcParser::FastBR1}, + {"::_pbi::TcParser::FastBR2", internal::TcParser::FastBR2}, + {"::_pbi::TcParser::FastSS1", internal::TcParser::FastSS1}, + {"::_pbi::TcParser::FastSS2", internal::TcParser::FastSS2}, + {"::_pbi::TcParser::FastSR1", internal::TcParser::FastSR1}, + {"::_pbi::TcParser::FastSR2", internal::TcParser::FastSR2}, + {"::_pbi::TcParser::FastUS1", internal::TcParser::FastUS1}, + {"::_pbi::TcParser::FastUS2", internal::TcParser::FastUS2}, + {"::_pbi::TcParser::FastUR1", internal::TcParser::FastUR1}, + {"::_pbi::TcParser::FastUR2", internal::TcParser::FastUR2}, + {"::_pbi::TcParser::FastBiS1", internal::TcParser::FastBiS1}, + {"::_pbi::TcParser::FastBiS2", internal::TcParser::FastBiS2}, + {"::_pbi::TcParser::FastSiS1", internal::TcParser::FastSiS1}, + {"::_pbi::TcParser::FastSiS2", internal::TcParser::FastSiS2}, + {"::_pbi::TcParser::FastUiS1", internal::TcParser::FastUiS1}, + {"::_pbi::TcParser::FastUiS2", internal::TcParser::FastUiS2}, + {"::_pbi::TcParser::FastMdS1", internal::TcParser::FastMdS1}, + {"::_pbi::TcParser::FastMdS2", internal::TcParser::FastMdS2}, + {"::_pbi::TcParser::FastGdS1", internal::TcParser::FastGdS1}, + {"::_pbi::TcParser::FastGdS2", internal::TcParser::FastGdS2}, + {"::_pbi::TcParser::FastMtS1", internal::TcParser::FastMtS1}, + {"::_pbi::TcParser::FastMtS2", internal::TcParser::FastMtS2}, + {"::_pbi::TcParser::FastGtS1", internal::TcParser::FastGtS1}, + {"::_pbi::TcParser::FastGtS2", internal::TcParser::FastGtS2}, + {"::_pbi::TcParser::FastMdR1", internal::TcParser::FastMdR1}, + {"::_pbi::TcParser::FastMdR2", internal::TcParser::FastMdR2}, + {"::_pbi::TcParser::FastGdR1", internal::TcParser::FastGdR1}, + {"::_pbi::TcParser::FastGdR2", internal::TcParser::FastGdR2}, + {"::_pbi::TcParser::FastMtR1", internal::TcParser::FastMtR1}, + {"::_pbi::TcParser::FastMtR2", internal::TcParser::FastMtR2}, + {"::_pbi::TcParser::FastGtR1", internal::TcParser::FastGtR1}, + {"::_pbi::TcParser::FastGtR2", internal::TcParser::FastGtR2}, + }; + auto it = map->find(name); + if (it == map->end()) { + GOOGLE_LOG(DFATAL) << "Failed to find function: " << name; + // Let's not crash in opt, just in case. + // MiniParse is always a valid parser. + return &internal::TcParser::MiniParse; + } + return it->second; +} + +const internal::TcParseTableBase* Reflection::CreateTcParseTableForMessageSet() + const { + // ParseLoop can't parse message set wire format. + // Create a dummy table that only exists to make TcParser::ParseLoop jump + // into the reflective parse loop. + + using Table = internal::TcParseTable<0, 0, 0, 1, 1>; + // We use `operator new` here because the destruction will be done with + // `operator delete` unconditionally. + void* p = ::operator new(sizeof(Table)); + auto* full_table = ::new (p) Table{ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, schema_.default_instance_, nullptr}, + {{{&internal::TcParser::ReflectionParseLoop, {}}}}}; + GOOGLE_DCHECK_EQ(static_cast(&full_table->header), + static_cast(full_table)); + return &full_table->header; +} + +void Reflection::PopulateTcParseFastEntries( + const internal::TailCallTableInfo& table_info, + TcParseTableBase::FastFieldEntry* fast_entries) const { + for (const auto& fast_field : table_info.fast_path_fields) { + if (fast_field.field == nullptr) { + // No fast entry here. Use mini parser. + *fast_entries++ = {internal::TcParser::MiniParse, {}}; + } else if (fast_field.func_name.find("TcParser::FastEv") != + fast_field.func_name.npos) { + // We can't use fast parsing for these entries because we can't specify + // the validator. Use the reflection based parser called from MiniParse. + // TODO(b/239592582): Implement a fast parser for these enums. + *fast_entries++ = {internal::TcParser::MiniParse, {}}; + } else { + *fast_entries++ = { + GetFastParseFunction(fast_field.func_name), + {fast_field.coded_tag, fast_field.hasbit_idx, fast_field.aux_idx, + static_cast(schema_.GetFieldOffset(fast_field.field))}}; + } + } +} + +static void PopulateTcParseLookupTable( + const internal::TailCallTableInfo& table_info, uint16_t* lookup_table) { + for (const auto& entry_block : table_info.num_to_entry_table.blocks) { + *lookup_table++ = entry_block.first_fnum & 0xFFFF; + *lookup_table++ = entry_block.first_fnum >> 16; + *lookup_table++ = entry_block.entries.size(); + for (auto se16 : entry_block.entries) { + *lookup_table++ = se16.skipmap; + *lookup_table++ = se16.field_entry_offset; + } + } + *lookup_table++ = 0xFFFF; + *lookup_table++ = 0xFFFF; +} + +void Reflection::PopulateTcParseEntries( + internal::TailCallTableInfo& table_info, + TcParseTableBase::FieldEntry* entries) const { + for (const auto& entry : table_info.field_entries) { + const FieldDescriptor* field = entry.field; + if (field->options().weak()) { + // Weak fields are handled by the generated fallback function. + // (These are handled by legacy Google-internal logic.) + *entries = {}; + } else if (field->type() == field->TYPE_ENUM && + table_info.aux_entries[entry.aux_idx].type == + internal::TailCallTableInfo::kEnumValidator) { + // Mini parse can't handle it. Fallback to reflection. + *entries = {}; + table_info.aux_entries[entry.aux_idx] = {}; + } else { + const OneofDescriptor* oneof = field->real_containing_oneof(); + entries->offset = schema_.GetFieldOffset(field); + if (oneof != nullptr) { + entries->has_idx = schema_.oneof_case_offset_ + 4 * oneof->index(); + } else if (schema_.HasHasbits()) { + entries->has_idx = + static_cast(8 * schema_.HasBitsOffset() + entry.hasbit_idx); + } else { + entries->has_idx = 0; + } + entries->aux_idx = entry.aux_idx; + entries->type_card = entry.type_card; + } + + ++entries; + } +} + +void Reflection::PopulateTcParseFieldAux( + const internal::TailCallTableInfo& table_info, + TcParseTableBase::FieldAux* field_aux) const { + for (const auto& aux_entry : table_info.aux_entries) { + switch (aux_entry.type) { + case internal::TailCallTableInfo::kNothing: + *field_aux++ = {}; + break; + case internal::TailCallTableInfo::kInlinedStringDonatedOffset: + field_aux++->offset = + static_cast(schema_.inlined_string_donated_offset_); + break; + case internal::TailCallTableInfo::kSplitOffset: + field_aux++->offset = schema_.SplitOffset(); + break; + case internal::TailCallTableInfo::kSplitSizeof: + field_aux++->offset = schema_.SizeofSplit(); + break; + case internal::TailCallTableInfo::kSubTable: + GOOGLE_LOG(FATAL) << "Not supported"; + break; + case internal::TailCallTableInfo::kSubMessage: + field_aux++->message_default_p = + GetDefaultMessageInstance(aux_entry.field); + break; + case internal::TailCallTableInfo::kEnumRange: + field_aux++->enum_range = {aux_entry.enum_range.start, + aux_entry.enum_range.size}; + break; + case internal::TailCallTableInfo::kEnumValidator: + GOOGLE_LOG(FATAL) << "Not supported."; + break; + case internal::TailCallTableInfo::kNumericOffset: + field_aux++->offset = aux_entry.offset; + break; + } + } +} + +const internal::TcParseTableBase* Reflection::CreateTcParseTable() const { + using TcParseTableBase = internal::TcParseTableBase; + + if (descriptor_->options().message_set_wire_format()) { + return CreateTcParseTableForMessageSet(); + } + + std::vector fields; + constexpr int kNoHasbit = -1; + std::vector has_bit_indices( + static_cast(descriptor_->field_count()), kNoHasbit); + std::vector inlined_string_indices = has_bit_indices; + for (int i = 0; i < descriptor_->field_count(); ++i) { + auto* field = descriptor_->field(i); + if (schema_.IsFieldStripped(field)) continue; + + fields.push_back(field); + has_bit_indices[static_cast(field->index())] = + static_cast(schema_.HasBitIndex(field)); + + if (IsInlined(field)) { + inlined_string_indices[static_cast(field->index())] = + schema_.InlinedStringIndex(field); + } + } + std::sort(fields.begin(), fields.end(), + [](const FieldDescriptor* a, const FieldDescriptor* b) { + return a->number() < b->number(); + }); + + class ReflectionOptionProvider final + : public internal::TailCallTableInfo::OptionProvider { + public: + explicit ReflectionOptionProvider(const Reflection& ref) : ref_(ref) {} + internal::TailCallTableInfo::PerFieldOptions GetForField( + const FieldDescriptor* field) const final { + return {ref_.IsLazyField(field), // + ref_.IsInlined(field), // + + // Only LITE can be implicitly weak. + /* is_implicitly_weak */ false, + + // We could change this to use direct table. + // Might be easier to do when all messages support TDP. + /* use_direct_tcparser_table */ false, + + /* is_lite */ false, // + ref_.schema_.IsSplit(field)}; + } + + private: + const Reflection& ref_; + }; + internal::TailCallTableInfo table_info( + descriptor_, fields, ReflectionOptionProvider(*this), has_bit_indices, + inlined_string_indices); + + const size_t fast_entries_count = table_info.fast_path_fields.size(); + GOOGLE_CHECK_EQ(fast_entries_count, 1 << table_info.table_size_log2); + const uint16_t lookup_table_offset = AlignTo( + sizeof(TcParseTableBase) + + fast_entries_count * sizeof(TcParseTableBase::FastFieldEntry)); + const uint32_t field_entry_offset = AlignTo( + lookup_table_offset + + sizeof(uint16_t) * table_info.num_to_entry_table.size16()); + const uint32_t aux_offset = AlignTo( + field_entry_offset + + sizeof(TcParseTableBase::FieldEntry) * fields.size()); + + int byte_size = + aux_offset + + sizeof(TcParseTableBase::FieldAux) * table_info.aux_entries.size() + + sizeof(char) * table_info.field_name_data.size(); + + void* p = ::operator new(byte_size); + auto* res = ::new (p) TcParseTableBase{ + static_cast(schema_.HasHasbits() ? schema_.HasBitsOffset() : 0), + // extensions handled through reflection. + 0, 0, 0, + static_cast(fields.empty() ? 0 : fields.back()->number()), + static_cast((fast_entries_count - 1) << 3), lookup_table_offset, + table_info.num_to_entry_table.skipmap32, field_entry_offset, + static_cast(fields.size()), + static_cast(table_info.aux_entries.size()), aux_offset, + schema_.default_instance_, &internal::TcParser::ReflectionFallback}; + + // Now copy the rest of the payloads + PopulateTcParseFastEntries(table_info, res->fast_entry(0)); + + PopulateTcParseLookupTable(table_info, res->field_lookup_begin()); + + PopulateTcParseEntries(table_info, res->field_entries_begin()); + + PopulateTcParseFieldAux(table_info, res->field_aux(0u)); + + // Copy the name data. + memcpy(res->name_data(), table_info.field_name_data.data(), + table_info.field_name_data.size()); + // Validation to make sure we used all the bytes correctly. + GOOGLE_CHECK_EQ(res->name_data() + table_info.field_name_data.size() - + reinterpret_cast(res), + byte_size); + + return res; +} + namespace { // Helper function to transform migration schema into reflection schema. diff --git a/src/google/protobuf/generated_message_tctable_decl.h b/src/google/protobuf/generated_message_tctable_decl.h index 3974dc34a7..b64767f09e 100644 --- a/src/google/protobuf/generated_message_tctable_decl.h +++ b/src/google/protobuf/generated_message_tctable_decl.h @@ -183,12 +183,19 @@ struct alignas(uint64_t) TcParseTableBase { const FastFieldEntry* fast_entry(size_t idx) const { return reinterpret_cast(this + 1) + idx; } + FastFieldEntry* fast_entry(size_t idx) { + return reinterpret_cast(this + 1) + idx; + } // Returns a begin iterator (pointer) to the start of the field lookup table. const uint16_t* field_lookup_begin() const { return reinterpret_cast(reinterpret_cast(this) + lookup_table_offset); } + uint16_t* field_lookup_begin() { + return reinterpret_cast(reinterpret_cast(this) + + lookup_table_offset); + } // Field entry for all fields. struct FieldEntry { @@ -203,6 +210,10 @@ struct alignas(uint64_t) TcParseTableBase { return reinterpret_cast( reinterpret_cast(this) + field_entries_offset); } + FieldEntry* field_entries_begin() { + return reinterpret_cast(reinterpret_cast(this) + + field_entries_offset); + } // Auxiliary entries for field types that need extra information. union FieldAux { @@ -234,6 +245,11 @@ struct alignas(uint64_t) TcParseTableBase { aux_offset) + idx; } + FieldAux* field_aux(uint32_t idx) { + return reinterpret_cast(reinterpret_cast(this) + + aux_offset) + + idx; + } const FieldAux* field_aux(const FieldEntry* entry) const { return field_aux(entry->aux_idx); } @@ -244,6 +260,11 @@ struct alignas(uint64_t) TcParseTableBase { aux_offset + num_aux_entries * sizeof(FieldAux)); } + char* name_data() { + return reinterpret_cast(reinterpret_cast(this) + + aux_offset + + num_aux_entries * sizeof(FieldAux)); + } }; #if defined(_MSC_VER) && !defined(_WIN64) diff --git a/src/google/protobuf/generated_message_tctable_full.cc b/src/google/protobuf/generated_message_tctable_full.cc index b77bb8d8e3..1f887e9da6 100644 --- a/src/google/protobuf/generated_message_tctable_full.cc +++ b/src/google/protobuf/generated_message_tctable_full.cc @@ -35,6 +35,7 @@ #include #include #include +#include // clang-format off #include @@ -48,6 +49,41 @@ const char* TcParser::GenericFallback(PROTOBUF_TC_PARAM_DECL) { return GenericFallbackImpl(PROTOBUF_TC_PARAM_PASS); } +const char* TcParser::ReflectionFallback(PROTOBUF_TC_PARAM_DECL) { + SyncHasbits(msg, hasbits, table); + uint32_t tag = data.tag(); + if (tag == 0 || (tag & 7) == WireFormatLite::WIRETYPE_END_GROUP) { + ctx->SetLastTag(tag); + return ptr; + } + + auto* full_msg = down_cast(msg); + auto* descriptor = full_msg->GetDescriptor(); + auto* reflection = full_msg->GetReflection(); + int field_number = WireFormatLite::GetTagFieldNumber(tag); + const FieldDescriptor* field = descriptor->FindFieldByNumber(field_number); + + // If that failed, check if the field is an extension. + if (field == nullptr && descriptor->IsExtensionNumber(field_number)) { + if (ctx->data().pool == nullptr) { + field = reflection->FindKnownExtensionByNumber(field_number); + } else { + field = ctx->data().pool->FindExtensionByNumber(descriptor, field_number); + } + } + + return WireFormat::_InternalParseAndMergeField(full_msg, ptr, ctx, tag, + reflection, field); +} + +const char* TcParser::ReflectionParseLoop(PROTOBUF_TC_PARAM_DECL) { + (void)data; + (void)table; + (void)hasbits; + // Call into the wire format reflective parse loop. + return WireFormat::_InternalParse(down_cast(msg), ptr, ctx); +} + } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/generated_message_tctable_gen.cc b/src/google/protobuf/generated_message_tctable_gen.cc new file mode 100644 index 0000000000..31c161e5ff --- /dev/null +++ b/src/google/protobuf/generated_message_tctable_gen.cc @@ -0,0 +1,779 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Must come last: +#include + +namespace google { +namespace protobuf { +namespace internal { + +namespace { + +bool GetEnumValidationRange(const EnumDescriptor* enum_type, int16_t& start, + uint16_t& size) { + GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString(); + + // Check if the enum values are a single, contiguous range. + std::vector enum_values; + for (int i = 0, N = static_cast(enum_type->value_count()); i < N; ++i) { + enum_values.push_back(enum_type->value(i)->number()); + } + auto values_begin = enum_values.begin(); + auto values_end = enum_values.end(); + std::sort(values_begin, values_end); + enum_values.erase(std::unique(values_begin, values_end), values_end); + + if (std::numeric_limits::min() <= enum_values[0] && + enum_values[0] <= std::numeric_limits::max() && + enum_values.size() <= std::numeric_limits::max() && + static_cast(enum_values[0] + enum_values.size() - 1) == + enum_values.back()) { + start = static_cast(enum_values[0]); + size = static_cast(enum_values.size()); + return true; + } else { + return false; + } +} + +void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry, + const TailCallTableInfo::PerFieldOptions& options, + TailCallTableInfo::FastFieldInfo& info) { + const FieldDescriptor* field = entry.field; + std::string name = "::_pbi::TcParser::Fast"; + uint8_t aux_idx = static_cast(entry.aux_idx); + + static const char* kPrefix[] = { + nullptr, // 0 + "F64", // TYPE_DOUBLE = 1, + "F32", // TYPE_FLOAT = 2, + "V64", // TYPE_INT64 = 3, + "V64", // TYPE_UINT64 = 4, + "V32", // TYPE_INT32 = 5, + "F64", // TYPE_FIXED64 = 6, + "F32", // TYPE_FIXED32 = 7, + "V8", // TYPE_BOOL = 8, + "", // TYPE_STRING = 9, + "G", // TYPE_GROUP = 10, + "M", // TYPE_MESSAGE = 11, + "B", // TYPE_BYTES = 12, + "V32", // TYPE_UINT32 = 13, + "", // TYPE_ENUM = 14, + "F32", // TYPE_SFIXED32 = 15, + "F64", // TYPE_SFIXED64 = 16, + "Z32", // TYPE_SINT32 = 17, + "Z64", // TYPE_SINT64 = 18, + }; + name.append(kPrefix[field->type()]); + + if (field->type() == field->TYPE_ENUM) { + // Enums are handled as: + // - V32 for open enums + // - Er (and Er0/Er1) for sequential enums + // - Ev for the rest + if (cpp::HasPreservingUnknownEnumSemantics(field)) { + name.append("V32"); + } else if (field->is_repeated() && field->is_packed()) { + GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString(); + return; + } else { + int16_t start; + uint16_t size; + if (GetEnumValidationRange(field->enum_type(), start, size)) { + name.append("Er"); + int max_value = start + size - 1; + if (max_value <= 127 && (start == 0 || start == 1)) { + name.append(1, '0' + start); + aux_idx = max_value; + } + } else { + name.append("Ev"); + } + } + } + if (field->type() == field->TYPE_STRING) { + switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) { + case internal::cpp::Utf8CheckMode::kStrict: + name.append("U"); + break; + case internal::cpp::Utf8CheckMode::kVerify: + name.append("S"); + break; + case internal::cpp::Utf8CheckMode::kNone: + name.append("B"); + break; + } + } + if (field->type() == field->TYPE_STRING || + field->type() == field->TYPE_BYTES) { + if (options.is_string_inlined) { + name.append("i"); + GOOGLE_CHECK(!field->is_repeated()); + aux_idx = static_cast(entry.inlined_string_idx); + } + } + if (field->type() == field->TYPE_MESSAGE || + field->type() == field->TYPE_GROUP) { + name.append(options.use_direct_tcparser_table ? "t" : "d"); + } + + // The field implementation functions are prefixed by cardinality: + // `S` for optional or implicit fields. + // `R` for non-packed repeated. + // `P` for packed repeated. + name.append(field->is_packed() ? "P" + : field->is_repeated() ? "R" + : field->real_containing_oneof() ? "O" + : "S"); + + // Append the tag length. Fast parsing only handles 1- or 2-byte tags. + name.append(field->number() < 16 ? "1" : "2"); + + info.func_name = std::move(name); + info.aux_idx = aux_idx; +} + +bool IsFieldEligibleForFastParsing( + const TailCallTableInfo::FieldEntryInfo& entry, + const TailCallTableInfo::OptionProvider& option_provider) { + const auto* field = entry.field; + const auto options = option_provider.GetForField(field); + // Map, oneof, weak, and lazy fields are not handled on the fast path. + if (field->is_map() || field->real_containing_oneof() || + field->options().weak() || options.is_implicitly_weak || + options.is_lazy || options.should_split) { + return false; + } + + // We will check for a valid auxiliary index range later. However, we might + // want to change the value we check for inlined string fields. + int aux_idx = entry.aux_idx; + + switch (field->type()) { + case FieldDescriptor::TYPE_ENUM: + // If enum values are not validated at parse time, then this field can be + // handled on the fast path like an int32. + if (cpp::HasPreservingUnknownEnumSemantics(field)) { + break; + } + if (field->is_repeated() && field->is_packed()) { + return false; + } + break; + + // Some bytes fields can be handled on fast path. + case FieldDescriptor::TYPE_STRING: + case FieldDescriptor::TYPE_BYTES: + if (field->options().ctype() != FieldOptions::STRING) { + return false; + } + if (options.is_string_inlined) { + GOOGLE_CHECK(!field->is_repeated()); + // For inlined strings, the donation state index is stored in the + // `aux_idx` field of the fast parsing info. We need to check the range + // of that value instead of the auxiliary index. + aux_idx = entry.inlined_string_idx; + } + break; + + default: + break; + } + + if (cpp::HasHasbit(field)) { + // The tailcall parser can only update the first 32 hasbits. Fields with + // has-bits beyond the first 32 are handled by mini parsing/fallback. + GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString(); + if (entry.hasbit_idx >= 32) return false; + } + + // If the field needs auxiliary data, then the aux index is needed. This + // must fit in a uint8_t. + if (aux_idx > std::numeric_limits::max()) { + return false; + } + + // The largest tag that can be read by the tailcall parser is two bytes + // when varint-coded. This allows 14 bits for the numeric tag value: + // byte 0 byte 1 + // 1nnnnttt 0nnnnnnn + // ^^^^^^^ ^^^^^^^ + if (field->number() >= 1 << 11) return false; + + return true; +} + +std::vector SplitFastFieldsForSize( + const std::vector& field_entries, + int table_size_log2, + const TailCallTableInfo::OptionProvider& option_provider) { + std::vector result(1 << table_size_log2); + const uint32_t idx_mask = static_cast(result.size() - 1); + + for (const auto& entry : field_entries) { + if (!IsFieldEligibleForFastParsing(entry, option_provider)) { + continue; + } + + const auto* field = entry.field; + const auto options = option_provider.GetForField(field); + uint32_t tag = WireFormat::MakeTag(field); + + // Construct the varint-coded tag. If it is more than 7 bits, we need to + // shift the high bits and add a continue bit. + if (uint32_t hibits = tag & 0xFFFFFF80) { + tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128 + } + + // The field index is determined by the low bits of the field number, where + // the table size determines the width of the mask. The largest table + // supported is 32 entries. The parse loop uses these bits directly, so that + // the dispatch does not require arithmetic: + // byte 0 byte 1 + // tag: 1nnnnttt 0nnnnnnn + // ^^^^^ + // idx (table_size_log2=5) + // This means that any field number that does not fit in the lower 4 bits + // will always have the top bit of its table index asserted. + const uint32_t fast_idx = (tag >> 3) & idx_mask; + + TailCallTableInfo::FastFieldInfo& info = result[fast_idx]; + if (info.field != nullptr) { + // This field entry is already filled. + continue; + } + + // Fill in this field's entry: + GOOGLE_CHECK(info.func_name.empty()) << info.func_name; + PopulateFastFieldEntry(entry, options, info); + info.field = field; + info.coded_tag = tag; + // If this field does not have presence, then it can set an out-of-bounds + // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32). + info.hasbit_idx = cpp::HasHasbit(field) ? entry.hasbit_idx : 63; + } + return result; +} + +// Filter out fields that will be handled by mini parsing. +std::vector FilterMiniParsedFields( + const std::vector& fields, + const TailCallTableInfo::OptionProvider& option_provider +) { + std::vector generated_fallback_fields; + + for (const auto* field : fields) { + auto options = option_provider.GetForField(field); + + bool handled = false; + switch (field->type()) { + case FieldDescriptor::TYPE_DOUBLE: + case FieldDescriptor::TYPE_FLOAT: + case FieldDescriptor::TYPE_FIXED32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_FIXED64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_BOOL: + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_SINT32: + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_SINT64: + case FieldDescriptor::TYPE_INT64: + // These are handled by MiniParse, so we don't need any generated + // fallback code. + handled = true; + break; + + case FieldDescriptor::TYPE_ENUM: + if (field->is_repeated() && + !cpp::HasPreservingUnknownEnumSemantics(field)) { + // TODO(b/206890171): handle packed repeated closed enums + // Non-packed repeated can be handled using tables, but we still + // need to generate fallback code for all repeated enums in order to + // handle packed encoding. This is because of the lite/full split + // when handling invalid enum values in a packed field. + handled = false; + } else { + handled = true; + } + break; + + case FieldDescriptor::TYPE_BYTES: + case FieldDescriptor::TYPE_STRING: + if (options.is_string_inlined) { + // TODO(b/198211897): support InilnedStringField. + handled = false; + } else { + handled = true; + } + break; + + case FieldDescriptor::TYPE_MESSAGE: + case FieldDescriptor::TYPE_GROUP: + // TODO(b/210762816): support remaining field types. + if (field->is_map() || field->options().weak() || + options.is_implicitly_weak || options.is_lazy) { + handled = false; + } else { + handled = true; + } + break; + + default: + handled = false; + break; + } + if (!handled) generated_fallback_fields.push_back(field); + } + + return generated_fallback_fields; +} + +std::vector GenerateFieldNames( + const Descriptor* descriptor, + const std::vector& fields) { + static constexpr int kMaxNameLength = 255; + std::vector out; + // First, we output the size of each string, as an unsigned byte. The first + // string is the message name. + int count = 1; + out.push_back(std::min(static_cast(descriptor->full_name().size()), + kMaxNameLength)); + for (const auto* field : fields) { + out.push_back(field->name().size()); + ++count; + } + while (count & 7) { // align to an 8-byte boundary + out.push_back(0); + ++count; + } + // The message name is stored at the beginning of the string + std::string message_name = descriptor->full_name(); + if (message_name.size() > kMaxNameLength) { + static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2; + message_name = StrCat( + message_name.substr(0, kNameHalfLength), "...", + message_name.substr(message_name.size() - kNameHalfLength)); + } + out.insert(out.end(), message_name.begin(), message_name.end()); + // Then we output the actual field names + for (const auto* field : fields) { + out.insert(out.end(), field->name().begin(), field->name().end()); + } + + return out; +} + +TailCallTableInfo::NumToEntryTable MakeNumToEntryTable( + const std::vector& field_descriptors) { + TailCallTableInfo::NumToEntryTable num_to_entry_table; + num_to_entry_table.skipmap32 = static_cast(-1); + + // skip_entry_block is the current block of SkipEntries that we're + // appending to. cur_block_first_fnum is the number of the first + // field represented by the block. + uint16_t field_entry_index = 0; + uint16_t N = field_descriptors.size(); + // First, handle field numbers 1-32, which affect only the initial + // skipmap32 and don't generate additional skip-entry blocks. + for (; field_entry_index != N; ++field_entry_index) { + auto* field_descriptor = field_descriptors[field_entry_index]; + if (field_descriptor->number() > 32) break; + auto skipmap32_index = field_descriptor->number() - 1; + num_to_entry_table.skipmap32 -= 1 << skipmap32_index; + } + // If all the field numbers were less than or equal to 32, we will have + // no further entries to process, and we are already done. + if (field_entry_index == N) return num_to_entry_table; + + TailCallTableInfo::SkipEntryBlock* block = nullptr; + bool start_new_block = true; + // To determine sparseness, track the field number corresponding to + // the start of the most recent skip entry. + uint32_t last_skip_entry_start = 0; + for (; field_entry_index != N; ++field_entry_index) { + auto* field_descriptor = field_descriptors[field_entry_index]; + uint32_t fnum = static_cast(field_descriptor->number()); + GOOGLE_CHECK_GT(fnum, last_skip_entry_start); + if (start_new_block == false) { + // If the next field number is within 15 of the last_skip_entry_start, we + // continue writing just to that entry. If it's between 16 and 31 more, + // then we just extend the current block by one. If it's more than 31 + // more, we have to add empty skip entries in order to continue using the + // existing block. Obviously it's just 32 more, it doesn't make sense to + // start a whole new block, since new blocks mean having to write out + // their starting field number, which is 32 bits, as well as the size of + // the additional block, which is 16... while an empty SkipEntry16 only + // costs 32 bits. So if it was 48 more, it's a slight space win; we save + // 16 bits, but probably at the cost of slower run time. We're choosing + // 96 for now. + if (fnum - last_skip_entry_start > 96) start_new_block = true; + } + if (start_new_block) { + num_to_entry_table.blocks.push_back({fnum}); + block = &num_to_entry_table.blocks.back(); + start_new_block = false; + } + + auto skip_entry_num = (fnum - block->first_fnum) / 16; + auto skip_entry_index = (fnum - block->first_fnum) % 16; + while (skip_entry_num >= block->entries.size()) + block->entries.push_back({0xFFFF, field_entry_index}); + block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index); + + last_skip_entry_start = fnum - skip_entry_index; + } + return num_to_entry_table; +} + +uint16_t MakeTypeCardForField( + const FieldDescriptor* field, + const TailCallTableInfo::PerFieldOptions& options) { + uint16_t type_card; + namespace fl = internal::field_layout; + if (internal::cpp::HasHasbit(field)) { + type_card = fl::kFcOptional; + } else if (field->is_repeated()) { + type_card = fl::kFcRepeated; + } else if (field->real_containing_oneof()) { + type_card = fl::kFcOneof; + } else { + type_card = fl::kFcSingular; + } + + // The rest of the type uses convenience aliases: + switch (field->type()) { + case FieldDescriptor::TYPE_DOUBLE: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedDouble + : fl::kDouble; + break; + case FieldDescriptor::TYPE_FLOAT: + type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedFloat + : fl::kFloat; + break; + case FieldDescriptor::TYPE_FIXED32: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedFixed32 + : fl::kFixed32; + break; + case FieldDescriptor::TYPE_SFIXED32: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedSFixed32 + : fl::kSFixed32; + break; + case FieldDescriptor::TYPE_FIXED64: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedFixed64 + : fl::kFixed64; + break; + case FieldDescriptor::TYPE_SFIXED64: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedSFixed64 + : fl::kSFixed64; + break; + case FieldDescriptor::TYPE_BOOL: + type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedBool + : fl::kBool; + break; + case FieldDescriptor::TYPE_ENUM: + if (internal::cpp::HasPreservingUnknownEnumSemantics(field)) { + // No validation is required. + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedOpenEnum + : fl::kOpenEnum; + } else { + int16_t start; + uint16_t size; + if (GetEnumValidationRange(field->enum_type(), start, size)) { + // Validation is done by range check (start/length in FieldAux). + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedEnumRange + : fl::kEnumRange; + } else { + // Validation uses the generated _IsValid function. + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedEnum + : fl::kEnum; + } + } + break; + case FieldDescriptor::TYPE_UINT32: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedUInt32 + : fl::kUInt32; + break; + case FieldDescriptor::TYPE_SINT32: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedSInt32 + : fl::kSInt32; + break; + case FieldDescriptor::TYPE_INT32: + type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt32 + : fl::kInt32; + break; + case FieldDescriptor::TYPE_UINT64: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedUInt64 + : fl::kUInt64; + break; + case FieldDescriptor::TYPE_SINT64: + type_card |= field->is_repeated() && field->is_packed() + ? fl::kPackedSInt64 + : fl::kSInt64; + break; + case FieldDescriptor::TYPE_INT64: + type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt64 + : fl::kInt64; + break; + + case FieldDescriptor::TYPE_BYTES: + type_card |= fl::kBytes; + break; + case FieldDescriptor::TYPE_STRING: { + switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) { + case internal::cpp::Utf8CheckMode::kStrict: + type_card |= fl::kUtf8String; + break; + case internal::cpp::Utf8CheckMode::kVerify: + type_card |= fl::kRawString; + break; + case internal::cpp::Utf8CheckMode::kNone: + type_card |= fl::kBytes; + break; + } + break; + } + + case FieldDescriptor::TYPE_GROUP: + type_card |= 0 | fl::kMessage | fl::kRepGroup; + if (options.use_direct_tcparser_table) { + type_card |= fl::kTvTable; + } else { + type_card |= fl::kTvDefault; + } + break; + case FieldDescriptor::TYPE_MESSAGE: + if (field->is_map()) { + type_card |= fl::kMap; + } else { + type_card |= fl::kMessage; + if (options.is_lazy) { + type_card |= fl::kRepLazy; + } else if (options.is_implicitly_weak) { + type_card |= fl::kRepIWeak; + } + + if (options.use_direct_tcparser_table) { + type_card |= fl::kTvTable; + } else { + type_card |= fl::kTvDefault; + } + } + break; + } + + // Fill in extra information about string and bytes field representations. + if (field->type() == FieldDescriptor::TYPE_BYTES || + field->type() == FieldDescriptor::TYPE_STRING) { + if (field->is_repeated()) { + type_card |= fl::kRepSString; + } else { + type_card |= fl::kRepAString; + } + } + + if (options.should_split) { + type_card |= fl::kSplitTrue; + } + + return type_card; +} + +} // namespace + +TailCallTableInfo::TailCallTableInfo( + const Descriptor* descriptor, + const std::vector& ordered_fields, + const OptionProvider& option_provider, + const std::vector& has_bit_indices, + const std::vector& inlined_string_indices) { + // If this message has any inlined string fields, store the donation state + // offset in the second auxiliary entry. + if (!inlined_string_indices.empty()) { + aux_entries.resize(1); // pad if necessary + aux_entries[0] = {kInlinedStringDonatedOffset}; + } + + // If this message is split, store the split pointer offset in the third + // auxiliary entry. + for (auto* field : ordered_fields) { + if (option_provider.GetForField(field).should_split) { + aux_entries.resize(3); // pad if necessary + aux_entries[1] = {kSplitOffset}; + aux_entries[2] = {kSplitSizeof}; + break; + } + } + + // Fill in mini table entries. + for (const FieldDescriptor* field : ordered_fields) { + auto options = option_provider.GetForField(field); + field_entries.push_back( + {field, internal::cpp ::HasHasbit(field) + ? has_bit_indices[static_cast(field->index())] + : -1}); + auto& entry = field_entries.back(); + entry.type_card = MakeTypeCardForField(field, options); + + if (field->type() == FieldDescriptor::TYPE_MESSAGE || + field->type() == FieldDescriptor::TYPE_GROUP) { + // Message-typed fields have a FieldAux with the default instance pointer. + if (field->is_map()) { + // TODO(b/205904770): generate aux entries for maps + } else if (field->options().weak()) { + // Don't generate anything for weak fields. They are handled by the + // generated fallback. + } else if (options.is_implicitly_weak) { + // Implicit weak fields don't need to store a default instance pointer. + } else if (options.is_lazy) { + // Lazy fields are handled by the generated fallback function. + } else { + field_entries.back().aux_idx = aux_entries.size(); + aux_entries.push_back( + {options.use_direct_tcparser_table ? kSubTable : kSubMessage, + {field}}); + } + } else if (field->type() == FieldDescriptor::TYPE_ENUM && + !cpp::HasPreservingUnknownEnumSemantics(field)) { + // Enum fields which preserve unknown values (proto3 behavior) are + // effectively int32 fields with respect to parsing -- i.e., the value + // does not need to be validated at parse time. + // + // Enum fields which do not preserve unknown values (proto2 behavior) use + // a FieldAux to store validation information. If the enum values are + // sequential (and within a range we can represent), then the FieldAux + // entry represents the range using the minimum value (which must fit in + // an int16_t) and count (a uint16_t). Otherwise, the entry holds a + // pointer to the generated Name_IsValid function. + + entry.aux_idx = aux_entries.size(); + aux_entries.push_back({}); + auto& aux_entry = aux_entries.back(); + + if (GetEnumValidationRange(field->enum_type(), aux_entry.enum_range.start, + aux_entry.enum_range.size)) { + aux_entry.type = kEnumRange; + } else { + aux_entry.type = kEnumValidator; + aux_entry.field = field; + } + + } else if ((field->type() == FieldDescriptor::TYPE_STRING || + field->type() == FieldDescriptor::TYPE_BYTES) && + options.is_string_inlined) { + GOOGLE_CHECK(!field->is_repeated()); + // Inlined strings have an extra marker to represent their donation state. + int idx = inlined_string_indices[static_cast(field->index())]; + // For mini parsing, the donation state index is stored as an `offset` + // auxiliary entry. + entry.aux_idx = aux_entries.size(); + aux_entries.push_back({kNumericOffset}); + aux_entries.back().offset = idx; + // For fast table parsing, the donation state index is stored instead of + // the aux_idx (this will limit the range to 8 bits). + entry.inlined_string_idx = idx; + } + } + + table_size_log2 = 0; // fallback value + int num_fast_fields = -1; + for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) { + size_t try_size = 1 << try_size_log2; + auto split_fields = + SplitFastFieldsForSize(field_entries, try_size_log2, option_provider); + GOOGLE_CHECK_EQ(split_fields.size(), try_size); + int try_num_fast_fields = 0; + for (const auto& info : split_fields) { + if (info.field != nullptr) ++try_num_fast_fields; + } + // Use this size if (and only if) it covers more fields. + if (try_num_fast_fields > num_fast_fields) { + fast_path_fields = std::move(split_fields); + table_size_log2 = try_size_log2; + num_fast_fields = try_num_fast_fields; + } + // The largest table we allow has the same number of entries as the + // message has fields, rounded up to the next power of 2 (e.g., a message + // with 5 fields can have a fast table of size 8). A larger table *might* + // cover more fields in certain cases, but a larger table in that case + // would have mostly empty entries; so, we cap the size to avoid + // pathologically sparse tables. + if (try_size > ordered_fields.size()) { + break; + } + } + + // Filter out fields that are handled by MiniParse. We don't need to generate + // a fallback for these, which saves code size. + fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider + ); + + num_to_entry_table = MakeNumToEntryTable(ordered_fields); + field_name_data = GenerateFieldNames(descriptor, ordered_fields); + + // If there are no fallback fields, and at most one extension range, the + // parser can use a generic fallback function. Otherwise, a message-specific + // fallback routine is needed. + use_generated_fallback = + !fallback_fields.empty() || descriptor->extension_range_count() > 1; +} + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include diff --git a/src/google/protobuf/generated_message_tctable_gen.h b/src/google/protobuf/generated_message_tctable_gen.h new file mode 100644 index 0000000000..cdb516808b --- /dev/null +++ b/src/google/protobuf/generated_message_tctable_gen.h @@ -0,0 +1,162 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file contains routines to generate tail-call table parsing tables. +// Everything in this file is for internal use only. + +#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__ +#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__ + +#include +#include +#include +#include + +#include +#include +#include + +// Must come last: +#include + +namespace google { +namespace protobuf { +namespace internal { + +// Helper class for generating tailcall parsing functions. +struct PROTOBUF_EXPORT TailCallTableInfo { + struct PerFieldOptions { + bool is_lazy; + bool is_string_inlined; + bool is_implicitly_weak; + bool use_direct_tcparser_table; + bool is_lite; + bool should_split; + }; + class OptionProvider { + public: + virtual PerFieldOptions GetForField(const FieldDescriptor*) const = 0; + + protected: + ~OptionProvider() = default; + }; + + TailCallTableInfo(const Descriptor* descriptor, + const std::vector& ordered_fields, + const OptionProvider& option_provider, + const std::vector& has_bit_indices, + const std::vector& inlined_string_indices); + + // Fields parsed by the table fast-path. + struct FastFieldInfo { + std::string func_name; + const FieldDescriptor* field; + uint16_t coded_tag; + uint8_t hasbit_idx; + uint8_t aux_idx; + }; + std::vector fast_path_fields; + + // Fields parsed by mini parsing routines. + struct FieldEntryInfo { + const FieldDescriptor* field; + int hasbit_idx; + int inlined_string_idx; + uint16_t aux_idx; + uint16_t type_card; + }; + std::vector field_entries; + + enum AuxType { + kNothing = 0, + kInlinedStringDonatedOffset, + kSplitOffset, + kSplitSizeof, + kSubMessage, + kSubTable, + kEnumRange, + kEnumValidator, + kNumericOffset, + }; + struct AuxEntry { + AuxType type; + struct EnumRange { + int16_t start; + uint16_t size; + }; + union { + const FieldDescriptor* field; + uint32_t offset; + EnumRange enum_range; + }; + }; + std::vector aux_entries; + + // Fields parsed by generated fallback function. + std::vector fallback_fields; + + struct SkipEntry16 { + uint16_t skipmap; + uint16_t field_entry_offset; + }; + struct SkipEntryBlock { + uint32_t first_fnum; + std::vector entries; + }; + struct NumToEntryTable { + uint32_t skipmap32; // for fields #1 - #32 + std::vector blocks; + // Compute the number of uint16_t required to represent this table. + int size16() const { + int size = 2; // for the termination field# + for (const auto& block : blocks) { + // 2 for the field#, 1 for a count of skip entries, 2 for each entry. + size += static_cast(3 + block.entries.size() * 2); + } + return size; + } + }; + NumToEntryTable num_to_entry_table; + + std::vector field_name_data; + + // Table size. + int table_size_log2; + // True if a generated fallback function is required instead of generic. + bool use_generated_fallback; +}; + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include + +#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TCTABLE_GEN_H__ diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index 3fad18c7f7..d1fbe108d5 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -279,6 +279,8 @@ class PROTOBUF_EXPORT TcParser final { static const char* GenericFallback(PROTOBUF_TC_PARAM_DECL); static const char* GenericFallbackLite(PROTOBUF_TC_PARAM_DECL); + static const char* ReflectionFallback(PROTOBUF_TC_PARAM_DECL); + static const char* ReflectionParseLoop(PROTOBUF_TC_PARAM_DECL); static const char* ParseLoop(MessageLite* msg, const char* ptr, ParseContext* ctx, diff --git a/src/google/protobuf/inlined_string_field_unittest.cc b/src/google/protobuf/inlined_string_field_unittest.cc index 9efe6e9324..db9577e45b 100644 --- a/src/google/protobuf/inlined_string_field_unittest.cc +++ b/src/google/protobuf/inlined_string_field_unittest.cc @@ -45,6 +45,7 @@ #include #include #include +#include namespace google { diff --git a/src/google/protobuf/message.cc b/src/google/protobuf/message.cc index 5052b1c827..c1cde4c5db 100644 --- a/src/google/protobuf/message.cc +++ b/src/google/protobuf/message.cc @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -167,7 +168,15 @@ void Message::DiscardUnknownFields() { const char* Message::_InternalParse(const char* ptr, internal::ParseContext* ctx) { +#if defined(PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION) + auto meta = GetMetadata(); + ptr = internal::TcParser::ParseLoop(this, ptr, ctx, + meta.reflection->GetTcParseTable()); + + return ptr; +#else return WireFormat::_InternalParse(this, ptr, ctx); +#endif } uint8_t* Message::_InternalSerialize(uint8_t* target, diff --git a/src/google/protobuf/message.h b/src/google/protobuf/message.h index 0d69a6bc38..18d7fc5dc8 100644 --- a/src/google/protobuf/message.h +++ b/src/google/protobuf/message.h @@ -122,6 +122,7 @@ #include #include #include +#include #include #include // TODO(b/211442718): cleanup #include @@ -157,6 +158,7 @@ struct DescriptorTable; class MapFieldBase; class SwapFieldHelper; class CachedSize; +struct TailCallTableInfo; } // namespace internal class UnknownFieldSet; // unknown_field_set.h namespace io { @@ -468,6 +470,8 @@ class MutableRepeatedFieldRef; // memory leaks. So, instead we ended up with this flat interface. class PROTOBUF_EXPORT Reflection final { public: + ~Reflection(); + // Get the UnknownFieldSet for the message. This contains fields which // were seen when the Message was parsed but were not recognized according // to the Message's definition. @@ -1061,10 +1065,34 @@ class PROTOBUF_EXPORT Reflection final { // contain weak fields, then this field equals descriptor_->field_count(). int last_non_weak_field_index_; + // The table-driven parser table. + // This table is generated on demand for Message types that did not override + // _InternalParse. It uses the reflection information to do so. + mutable internal::once_flag tcparse_table_once_; + using TcParseTableBase = internal::TcParseTableBase; + mutable const TcParseTableBase* tcparse_table_ = nullptr; + + const TcParseTableBase* GetTcParseTable() const { + internal::call_once(tcparse_table_once_, + [&] { tcparse_table_ = CreateTcParseTable(); }); + return tcparse_table_; + } + + const TcParseTableBase* CreateTcParseTable() const; + const TcParseTableBase* CreateTcParseTableForMessageSet() const; + void PopulateTcParseFastEntries( + const internal::TailCallTableInfo& table_info, + TcParseTableBase::FastFieldEntry* fast_entries) const; + void PopulateTcParseEntries(internal::TailCallTableInfo& table_info, + TcParseTableBase::FieldEntry* entries) const; + void PopulateTcParseFieldAux(const internal::TailCallTableInfo& table_info, + TcParseTableBase::FieldAux* field_aux) const; + template friend class RepeatedFieldRef; template friend class MutableRepeatedFieldRef; + friend class Message; friend class ::PROTOBUF_NAMESPACE_ID::MessageLayoutInspector; friend class ::PROTOBUF_NAMESPACE_ID::AssignDescriptorsHelper; friend class DynamicMessageFactory; diff --git a/src/google/protobuf/port.h b/src/google/protobuf/port.h index a5c060b6f6..09e82c46ec 100644 --- a/src/google/protobuf/port.h +++ b/src/google/protobuf/port.h @@ -47,6 +47,8 @@ inline void SizedDelete(void* p, size_t size) { #if defined(__cpp_sized_deallocation) ::operator delete(p, size); #else + // Avoid -Wunused-parameter + (void)size; ::operator delete(p); #endif } @@ -54,6 +56,8 @@ inline void SizedArrayDelete(void* p, size_t size) { #if defined(__cpp_sized_deallocation) ::operator delete[](p, size); #else + // Avoid -Wunused-parameter + (void)size; ::operator delete[](p); #endif } diff --git a/src/google/protobuf/port_def.inc b/src/google/protobuf/port_def.inc index 7d067fd2e8..dccbf85277 100644 --- a/src/google/protobuf/port_def.inc +++ b/src/google/protobuf/port_def.inc @@ -832,6 +832,15 @@ #define PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED 1 #endif +#ifdef PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION +#error PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION was previously defined +#endif +#if !defined(PROTOBUF_TEMPORARY_DISABLE_TABLE_PARSER_ON_REFLECTION) && \ + (defined(PROTOBUF_EXPERIMENTAL_USE_TABLE_PARSER_ON_REFLECTION) || \ + defined(PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED)) +#define PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION 1 +#endif // PROTOBUF_ENABLE_FORCE_ALLOCATION_ON_CONSTRUCTION + // Note that this is performance sensitive: changing the parameters will change // the registers used by the ABI calling convention, which subsequently affects // register selection logic inside the function. diff --git a/src/google/protobuf/port_undef.inc b/src/google/protobuf/port_undef.inc index 23eb789d1e..d0ab7a8af7 100644 --- a/src/google/protobuf/port_undef.inc +++ b/src/google/protobuf/port_undef.inc @@ -103,6 +103,7 @@ #undef PROTOBUF_MSAN #undef PROTOBUF_TSAN #undef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED +#undef PROTOBUF_USE_TABLE_PARSER_ON_REFLECTION #undef PROTOBUF_TC_PARAM_DECL #undef PROTOBUF_EXCLUSIVE_LOCKS_REQUIRED #undef PROTOBUF_LOCKS_EXCLUDED diff --git a/src/google/protobuf/proto3_arena_lite_unittest.cc b/src/google/protobuf/proto3_arena_lite_unittest.cc index 06b8d08c8f..a3fdb96415 100644 --- a/src/google/protobuf/proto3_arena_lite_unittest.cc +++ b/src/google/protobuf/proto3_arena_lite_unittest.cc @@ -34,7 +34,6 @@ #include #include -#include #include using proto3_arena_unittest::TestAllTypes; diff --git a/src/google/protobuf/proto3_arena_unittest.cc b/src/google/protobuf/proto3_arena_unittest.cc index cfbe198abe..9538ab9c6b 100644 --- a/src/google/protobuf/proto3_arena_unittest.cc +++ b/src/google/protobuf/proto3_arena_unittest.cc @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/src/google/protobuf/proto3_lite_unittest.inc b/src/google/protobuf/proto3_lite_unittest.inc index 5878163b53..eb3fd7adde 100644 --- a/src/google/protobuf/proto3_lite_unittest.inc +++ b/src/google/protobuf/proto3_lite_unittest.inc @@ -33,7 +33,6 @@ #include #include -#include #include using UNITTEST::TestAllTypes; diff --git a/src/google/protobuf/reflection_ops_unittest.cc b/src/google/protobuf/reflection_ops_unittest.cc index 513ce47336..30f225c2c0 100644 --- a/src/google/protobuf/reflection_ops_unittest.cc +++ b/src/google/protobuf/reflection_ops_unittest.cc @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/src/google/protobuf/repeated_field.h b/src/google/protobuf/repeated_field.h index a7c1275b60..5f4ea913e9 100644 --- a/src/google/protobuf/repeated_field.h +++ b/src/google/protobuf/repeated_field.h @@ -113,10 +113,21 @@ void memswap(char* a, char* b) { b += kBlockSize; } +#if defined(__GNUC__) && !defined(__clang__) + // Workaround GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif // __GNUC__ + // Swap the leftover bytes, could be zero. memcpy(&buf, a, kSize % kBlockSize); memcpy(a, b, kSize % kBlockSize); memcpy(b, &buf, kSize % kBlockSize); + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif // GCC } template diff --git a/src/google/protobuf/repeated_field_unittest.cc b/src/google/protobuf/repeated_field_unittest.cc index 2cb6bab8c7..0aa90f73bc 100644 --- a/src/google/protobuf/repeated_field_unittest.cc +++ b/src/google/protobuf/repeated_field_unittest.cc @@ -52,7 +52,6 @@ #include #include #include -#include #include #include diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc index 2a2cb84f4f..1f80ece691 100644 --- a/src/google/protobuf/text_format.cc +++ b/src/google/protobuf/text_format.cc @@ -42,6 +42,7 @@ #include #include #include +#include #include #include diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h index 1acbf9e1f0..68586721ef 100644 --- a/src/google/protobuf/wire_format.h +++ b/src/google/protobuf/wire_format.h @@ -285,6 +285,7 @@ class PROTOBUF_EXPORT WireFormat { private: struct MessageSetParser; + friend class TcParser; // Skip a MessageSet field. static bool SkipMessageSetField(io::CodedInputStream* input, uint32_t field_number,