Editions test enhancements to cover utf8 handling.

This replaces the cc_utf8_verification and enforce_utf8 options with the corresponding feature values, consistent with C++ behavior.  Further runtimes will be supported by refactoring the string_field_validation feature in a later change.

PiperOrigin-RevId: 545824813
pull/13221/head
Mike Kruskal 1 year ago committed by Copybara-Service
parent 9609f42dd7
commit 7599210683
  1. 20
      src/google/protobuf/descriptor.cc
  2. 11
      src/google/protobuf/editions/codegen_tests/BUILD
  3. 3
      src/google/protobuf/editions/codegen_tests/proto2_utf8_disabled.proto
  4. 40
      src/google/protobuf/editions/codegen_tests/proto2_utf8_lite.proto
  5. 1
      src/google/protobuf/editions/codegen_tests/proto3_utf8_disabled.proto
  6. 1
      src/google/protobuf/editions/codegen_tests/proto3_utf8_strict.proto
  7. 15
      src/google/protobuf/editions/golden/editions_transform_proto2.proto
  8. 51
      src/google/protobuf/editions/golden/editions_transform_proto2_lite.proto
  9. 50
      src/google/protobuf/editions/golden/editions_transform_proto2_utf8_disabled.proto
  10. 48
      src/google/protobuf/editions/golden/editions_transform_proto3.proto
  11. 45
      src/google/protobuf/editions/golden/editions_transform_proto3_utf8_disabled.proto
  12. 2
      src/google/protobuf/editions/golden/test_messages_proto2.proto
  13. 25
      src/google/protobuf/editions/proto/editions_transform_proto2.proto
  14. 45
      src/google/protobuf/editions/proto/editions_transform_proto2_lite.proto
  15. 44
      src/google/protobuf/editions/proto/editions_transform_proto2_utf8_disabled.proto
  16. 47
      src/google/protobuf/editions/proto/editions_transform_proto3.proto
  17. 44
      src/google/protobuf/editions/proto/editions_transform_proto3_utf8_disabled.proto
  18. 41
      src/google/protobuf/editions/transform.awk

@ -3875,19 +3875,18 @@ bool FieldDescriptor::is_packed() const {
} }
} }
static bool FieldEnforceUtf8(const FieldDescriptor* field) { static bool IsStrictUtf8(const FieldDescriptor* field) {
return
#ifdef PROTOBUF_FUTURE_EDITIONS #ifdef PROTOBUF_FUTURE_EDITIONS
internal::InternalFeatureHelper::GetFeatures(*field) return internal::InternalFeatureHelper::GetFeatures(*field)
.string_field_validation() == FeatureSet::MANDATORY; .string_field_validation() == FeatureSet::MANDATORY;
#else // PROTOBUF_FUTURE_EDITIONS #else // PROTOBUF_FUTURE_EDITIONS
FileDescriptorLegacy(field->file()).syntax() == return FileDescriptorLegacy(field->file()).syntax() ==
FileDescriptorLegacy::Syntax::SYNTAX_PROTO3; FileDescriptorLegacy::Syntax::SYNTAX_PROTO3;
#endif // PROTOBUF_FUTURE_EDITIONS #endif // PROTOBUF_FUTURE_EDITIONS
} }
bool FieldDescriptor::requires_utf8_validation() const { bool FieldDescriptor::requires_utf8_validation() const {
return type() == TYPE_STRING && FieldEnforceUtf8(this); return type() == TYPE_STRING && IsStrictUtf8(this);
} }
bool FieldDescriptor::has_presence() const { bool FieldDescriptor::has_presence() const {
@ -9567,15 +9566,16 @@ bool HasHasbit(const FieldDescriptor* field) {
!field->options().weak(); !field->options().weak();
} }
static bool FileUtf8Verification(const FileDescriptor* file) { static bool IsVerifyUtf8(const FieldDescriptor* field, bool is_lite) {
if (is_lite) return false;
return true; return true;
} }
// Which level of UTF-8 enforcemant is placed on this file. // Which level of UTF-8 enforcemant is placed on this file.
Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, bool is_lite) { Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field, bool is_lite) {
if (FieldEnforceUtf8(field)) { if (IsStrictUtf8(field)) {
return Utf8CheckMode::kStrict; return Utf8CheckMode::kStrict;
} else if (!is_lite && FileUtf8Verification(field->file())) { } else if (IsVerifyUtf8(field, is_lite)) {
return Utf8CheckMode::kVerify; return Utf8CheckMode::kVerify;
} else { } else {
return Utf8CheckMode::kNone; return Utf8CheckMode::kNone;

@ -103,6 +103,17 @@ cc_proto_library(
deps = [":proto2_utf8_disabled_proto"], deps = [":proto2_utf8_disabled_proto"],
) )
proto_library(
name = "proto2_utf8_lite_proto",
srcs = ["proto2_utf8_lite.proto"],
strip_import_prefix = "/src",
)
cc_proto_library(
name = "proto2_utf8_lite_cc_proto",
deps = [":proto2_utf8_lite_proto"],
)
proto_library( proto_library(
name = "proto2_proto3_enum_proto", name = "proto2_proto3_enum_proto",
srcs = ["proto2_proto3_enum.proto"], srcs = ["proto2_proto3_enum.proto"],

@ -33,6 +33,7 @@ syntax = "proto2";
package protobuf_editions_test.proto2; package protobuf_editions_test.proto2;
message Proto2Utf8Strict { message Proto2Utf8Disabled {
optional string string_field = 1; optional string string_field = 1;
map<string, string> map_field = 2;
} }

@ -0,0 +1,40 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package protobuf_editions_test.proto2;
option optimize_for = LITE_RUNTIME;
message Proto2Utf8Lite {
optional string string_field = 1;
map<string, string> map_field = 2;
}

@ -34,4 +34,5 @@ package protobuf_editions_test.proto3;
message Proto3Utf8Disabled { message Proto3Utf8Disabled {
string string_field = 1; string string_field = 1;
map<string, string> map_field = 10;
} }

@ -34,4 +34,5 @@ package protobuf_editions_test.proto3;
message Proto3Utf8Strict { message Proto3Utf8Strict {
string string_field = 1; string string_field = 1;
map<string, string> map_field = 10;
} }

@ -32,7 +32,7 @@ edition = "2023";
import "google/protobuf/cpp_features.proto"; import "google/protobuf/cpp_features.proto";
option features.enum_type = CLOSED; option features.enum_type = CLOSED;
option features.repeated_field_encoding = EXPANDED; option features.repeated_field_encoding = EXPANDED;
option features.string_field_validation = NONE; option features.string_field_validation = HINT;
option features.json_format = LEGACY_BEST_EFFORT; option features.json_format = LEGACY_BEST_EFFORT;
option features.(pb.cpp).legacy_closed_enum = true; option features.(pb.cpp).legacy_closed_enum = true;
@ -81,6 +81,19 @@ extend ParentMessage.ExtendedMessage {
} }
message TestMessage { message TestMessage {
string string_field = 1;
string string_field_utf = 2;
string string_field_noutf = 3;
string options_strip_beginning = 4 [ ctype = STRING_PIECE, default = "hello world abcd" ];
string options_strip_middle = 5 [ ctype = STRING_PIECE, default = "hello world abcd" ];
string options_strip_end = 6 [ ctype = STRING_PIECE, default = "hello world abcd" ];
map<string, string> string_map_field = 7;
repeated int32 int_field = 8;
repeated int32 int_field_packed = 9 [features.repeated_field_encoding = PACKED];
repeated int32 int_field_unpacked = 10;
message OptionalGroup { message OptionalGroup {
int32 a = 17; int32 a = 17;
} }

@ -0,0 +1,51 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
edition = "2023";
import "google/protobuf/cpp_features.proto";
option features.enum_type = CLOSED;
option features.repeated_field_encoding = EXPANDED;
option features.string_field_validation = HINT;
option features.json_format = LEGACY_BEST_EFFORT;
option features.(pb.cpp).legacy_closed_enum = true;
package protobuf_editions_test;
option optimize_for = LITE_RUNTIME;
message TestMessageLite {
string string_field = 1;
string string_field_utf = 2;
string string_field_noutf = 3;
map<string, string> string_map_field = 4;
int32 int_field = 5;
}

@ -0,0 +1,50 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
edition = "2023";
import "google/protobuf/cpp_features.proto";
option features.enum_type = CLOSED;
option features.repeated_field_encoding = EXPANDED;
option features.string_field_validation = HINT;
option features.json_format = LEGACY_BEST_EFFORT;
option features.(pb.cpp).legacy_closed_enum = true;
package protobuf_editions_test;
message TestMessageUtf8Disabled {
string string_field = 1 [features.string_field_validation = NONE];
string string_field_utf = 2 [features.string_field_validation = NONE];
string string_field_noutf = 3 [features.string_field_validation = NONE];
map<string, string> string_map_field = 4 [features.string_field_validation = NONE];
int32 int_field = 5;
}

@ -0,0 +1,48 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
edition = "2023";
option features.field_presence = IMPLICIT;
package protobuf_editions_test;
message TestMessageProto3 {
string string_field = 1;
string string_field_utf = 2;
string string_field_noutf = 3 [features.string_field_validation = HINT];
map<string, string> string_map_field = 4;
map<string, string> string_map_field_utf = 5;
map<string, string> string_map_field_noutf = 6 [features.string_field_validation = HINT];
repeated int32 int_field = 7;
repeated int32 int_field_packed = 8;
repeated int32 int_field_unpacked = 9 [features.repeated_field_encoding = EXPANDED];
}

@ -0,0 +1,45 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
edition = "2023";
option features.field_presence = IMPLICIT;
package protobuf_editions_test;
message TestMessageProto3 {
string string_field = 1 [features.string_field_validation = NONE];
string string_field_utf = 2 [features.string_field_validation = NONE];
string string_field_noutf = 3 [features.string_field_validation = NONE];
map<string, string> string_map_field = 4 [features.string_field_validation = NONE];
repeated int32 int_field = 7;
}

@ -39,7 +39,7 @@ edition = "2023";
import "google/protobuf/cpp_features.proto"; import "google/protobuf/cpp_features.proto";
option features.enum_type = CLOSED; option features.enum_type = CLOSED;
option features.repeated_field_encoding = EXPANDED; option features.repeated_field_encoding = EXPANDED;
option features.string_field_validation = NONE; option features.string_field_validation = HINT;
option features.json_format = LEGACY_BEST_EFFORT; option features.json_format = LEGACY_BEST_EFFORT;
option features.(pb.cpp).legacy_closed_enum = true; option features.(pb.cpp).legacy_closed_enum = true;

@ -76,6 +76,31 @@ extend ParentMessage.ExtendedMessage {
} }
message TestMessage { message TestMessage {
optional string string_field = 1;
optional string string_field_utf = 2;
optional string string_field_noutf = 3;
optional string options_strip_beginning = 4 [
enforce_utf8 = false,
ctype = STRING_PIECE,
default = "hello world abcd"
];
optional string options_strip_middle = 5 [
ctype = STRING_PIECE,
enforce_utf8 = false,
default = "hello world abcd"
];
optional string options_strip_end = 6 [
ctype = STRING_PIECE,
default = "hello world abcd",
enforce_utf8 = false
];
map<string, string> string_map_field = 7;
repeated int32 int_field = 8;
repeated int32 int_field_packed = 9 [packed = true];
repeated int32 int_field_unpacked = 10 [packed = false];
optional group OptionalGroup = 16 { optional group OptionalGroup = 16 {
optional int32 a = 17; optional int32 a = 17;
} }

@ -0,0 +1,45 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package protobuf_editions_test;
option optimize_for = LITE_RUNTIME;
message TestMessageLite {
optional string string_field = 1;
optional string string_field_utf = 2;
optional string string_field_noutf = 3;
map<string, string> string_map_field = 4;
optional int32 int_field = 5;
}

@ -0,0 +1,44 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package protobuf_editions_test;
message TestMessageUtf8Disabled {
optional string string_field = 1;
optional string string_field_utf = 2;
optional string string_field_noutf = 3;
map<string, string> string_map_field = 4;
optional int32 int_field = 5;
}

@ -0,0 +1,47 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package protobuf_editions_test;
message TestMessageProto3 {
string string_field = 1;
string string_field_utf = 2;
string string_field_noutf = 3;
map<string, string> string_map_field = 4;
map<string, string> string_map_field_utf = 5;
map<string, string> string_map_field_noutf = 6;
repeated int32 int_field = 7;
repeated int32 int_field_packed = 8 [packed = true];
repeated int32 int_field_unpacked = 9 [packed = false];
}

@ -0,0 +1,44 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package protobuf_editions_test;
message TestMessageProto3 {
string string_field = 1;
string string_field_utf = 2;
string string_field_noutf = 3;
map<string, string> string_map_field = 4;
repeated int32 int_field = 7;
}

@ -41,6 +41,16 @@ function join(array, len, sep)
return result return result
} }
function strip_option(option, options_list)
{
# First try to strip out matching commas
sub("\\<" option "\\s*,", "", options_list)
sub(",\\s*" option "\\>", "", options_list)
# Fallback to just stripping the option
sub(option, "", options_list)
return options_list
}
function transform_field(field) function transform_field(field)
{ {
if (!match(field, /\w+\s*=\s*[0-9-]+/)) { if (!match(field, /\w+\s*=\s*[0-9-]+/)) {
@ -59,20 +69,28 @@ function transform_field(field)
num_options = 0 num_options = 0
if(syntax == 2) { if(syntax == 2) {
sub(/\<optional /, "", field_def) sub(/\<optional\s*/, "", field_def)
sub(/\<packed = true\>/, "features.repeated_field_encoding = PACKED", existing_options) sub(/\<packed = true\>/, "features.repeated_field_encoding = PACKED", existing_options)
sub(/\<packed = false\>,/, "", existing_options) existing_options = strip_option("packed = false", existing_options)
sub(/,?packed = false\>/, "", existing_options) existing_options = strip_option("enforce_utf8 = (true|false)", existing_options)
if (match($0, /\<required\>/)) { if (match(field_def, /^\s*required\>/)) {
sub(/\<required /, "", field_def) sub(/\<required\s*/, "", field_def)
options[++num_options] = "features.field_presence = LEGACY_REQUIRED" options[++num_options] = "features.field_presence = LEGACY_REQUIRED"
} }
if (disable_utf8 && match(field_def, /^\s*(string|repeated\s*string|map<string,\s*string>)/)) {
options[++num_options] = "features.string_field_validation = NONE"
}
} }
if(syntax == 3) { if(syntax == 3) {
if (disable_utf8 && match(field_def, /^\s*(string|repeated\s*string|map<string,\s*string>)/)) {
options[++num_options] = "features.string_field_validation = NONE"
} else {
sub(/\<enforce_utf8 = false\>/, "features.string_field_validation = HINT", existing_options)
}
sub(/\<packed = false\>/, "features.repeated_field_encoding = EXPANDED", existing_options) sub(/\<packed = false\>/, "features.repeated_field_encoding = EXPANDED", existing_options)
sub(/\<packed = true\>,/, "", existing_options) existing_options = strip_option("packed = true", existing_options)
sub(/,?packed = true\>/, "", existing_options) existing_options = strip_option("enforce_utf8 = (true|false)", existing_options)
if (match($0, /\<optional\>/)) { if (match($0, /\<optional\>/)) {
sub(/\<optional /, "", field_def) sub(/\<optional /, "", field_def)
options[++num_options] = "features.field_presence = EXPLICIT" options[++num_options] = "features.field_presence = EXPLICIT"
@ -103,7 +121,7 @@ function transform_field(field)
print "import \"third_party/protobuf/cpp_features.proto\";" print "import \"third_party/protobuf/cpp_features.proto\";"
print "option features.enum_type = CLOSED;" print "option features.enum_type = CLOSED;"
print "option features.repeated_field_encoding = EXPANDED;" print "option features.repeated_field_encoding = EXPANDED;"
print "option features.string_field_validation = NONE;" print "option features.string_field_validation = HINT;"
print "option features.json_format = LEGACY_BEST_EFFORT;" print "option features.json_format = LEGACY_BEST_EFFORT;"
print "option features.(pb.cpp).legacy_closed_enum = true;" print "option features.(pb.cpp).legacy_closed_enum = true;"
syntax = 2 syntax = 2
@ -117,6 +135,13 @@ function transform_field(field)
next next
} }
# utf8 validation handling
/option (cc_utf8_verification\s*=\s*false)/ {
disable_utf8 = 1
# Strip this option and replace with feature setting.
next;
}
# Group handling. # Group handling.
/\<group \w* = [0-9]* {/, /}/ { /\<group \w* = [0-9]* {/, /}/ {
if (match($0, /\<group\>/)) { if (match($0, /\<group\>/)) {

Loading…
Cancel
Save