From 3be46483ba11605ec9d29ae439a16c4fc6bf02c3 Mon Sep 17 00:00:00 2001 From: Phil Clay Date: Thu, 3 Feb 2022 09:49:57 -0800 Subject: [PATCH] Resolve more java field accessor name conflicts (#8198) * Remove javanano from .gitignore. Ignore java/lite/target * Resolve more java field accessor name conflicts. Previously, some proto field names would cause the java code generator to generate accessor names that conflict with method names from the message super classes/interfaces. A list of field names that cause such conflicts previously existed, but the list did not contain every field name that would cause a conflict. Additionally, only snake_case field names would be detected. If the field name was in camelCase, the conflict would not be detected. This change adds the complete set of field names that will cause assessor name conflicts, and detects conflicts in both snake_case and camelCase field names. Fixes #8142 * Prevent java accessor name conflicts for fields with leading underscores. Previously, some protobuf field names beginning with leading underscores (e.g. _class) would cause uncompilable java code to be generated due to assessor name conflicts. Now, non-conflicting java accessor method names are created for those fields * Improve comments/documentation for conversion from snake case to camel case Rename snakeCaseToCamelCase to snakeCaseToLowerCamelCase Add snakeCaseToUpperCamelCase Add clarifying in-line comments for field name generation Remove explicit version numbers from references. * Fix indents and typo * Unnest
 tag

* improve grammar in comments
are colliding -> collide

* Remove ternary operator and improve comments

* Fix typo in comment
---
 .../DescriptorMessageInfoFactory.java         | 123 ++++++++++++++++--
 .../protobuf/test_bad_identifiers.proto       |  71 ++++++++--
 .../protobuf/compiler/java/java_helpers.cc    |  27 ++--
 3 files changed, 193 insertions(+), 28 deletions(-)

diff --git a/java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java b/java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java
index 7975136596..adee556903 100644
--- a/java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java
+++ b/java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java
@@ -61,8 +61,31 @@ import java.util.concurrent.ConcurrentHashMap;
 final class DescriptorMessageInfoFactory implements MessageInfoFactory {
   private static final String GET_DEFAULT_INSTANCE_METHOD_NAME = "getDefaultInstance";
   private static final DescriptorMessageInfoFactory instance = new DescriptorMessageInfoFactory();
+
+  /**
+   * Names that should be avoided (in UpperCamelCase format).
+   * Using them causes the compiler to generate accessors whose names
+   * collide with methods defined in base classes.
+   *
+   * Keep this list in sync with kForbiddenWordList in
+   * src/google/protobuf/compiler/java/java_helpers.cc
+   */
   private static final Set specialFieldNames =
-      new HashSet<>(Arrays.asList("cached_size", "serialized_size", "class"));
+      new HashSet<>(Arrays.asList(
+        // java.lang.Object:
+        "Class",
+        // com.google.protobuf.MessageLiteOrBuilder:
+        "DefaultInstanceForType",
+        // com.google.protobuf.MessageLite:
+        "ParserForType",
+        "SerializedSize",
+        // com.google.protobuf.MessageOrBuilder:
+        "AllFields",
+        "DescriptorForType",
+        "InitializationErrorString",
+        "UnknownFields",
+        // obsolete. kept for backwards compatibility of generated code
+        "CachedSize"));
 
   // Disallow construction - it's a singleton.
   private DescriptorMessageInfoFactory() {}
@@ -593,21 +616,101 @@ final class DescriptorMessageInfoFactory implements MessageInfoFactory {
     String name = (fd.getType() == FieldDescriptor.Type.GROUP)
                   ? fd.getMessageType().getName()
                   : fd.getName();
-    String suffix = specialFieldNames.contains(name) ? "__" : "_";
-    return snakeCaseToCamelCase(name) + suffix;
+
+    // convert to UpperCamelCase for comparison to the specialFieldNames
+    // (which are in UpperCamelCase)
+    String upperCamelCaseName = snakeCaseToUpperCamelCase(name);
+
+    // Append underscores to match the behavior of the protoc java compiler
+    final String suffix;
+    if (specialFieldNames.contains(upperCamelCaseName)) {
+      // For proto field names that match the specialFieldNames,
+      // the protoc java compiler appends "__" to the java field name
+      // to prevent the field's accessor method names from clashing with other methods.
+      // For example:
+      //     proto field name = "class"
+      //     java field name = "class__"
+      //     accessor method name = "getClass_()"  (so that it does not clash with Object.getClass())
+      suffix = "__";
+    } else {
+      // For other proto field names,
+      // the protoc java compiler appends "_" to the java field name
+      // to prevent field names from clashing with java keywords.
+      // For example:
+      //     proto field name = "int"
+      //     java field name = "int_" (so that it does not clash with int keyword)
+      //     accessor method name = "getInt()"
+      suffix = "_";
+    }
+    return snakeCaseToLowerCamelCase(name) + suffix;
   }
 
   private static String getCachedSizeFieldName(FieldDescriptor fd) {
-    return snakeCaseToCamelCase(fd.getName()) + "MemoizedSerializedSize";
+    return snakeCaseToLowerCamelCase(fd.getName()) + "MemoizedSerializedSize";
+  }
+
+  /**
+   * Converts a snake case string into lower camel case.
+   *
+   * 

Some examples:

+ *
+   *     snakeCaseToLowerCamelCase("foo_bar") => "fooBar"
+   *     snakeCaseToLowerCamelCase("foo") => "foo"
+   * 
+ * + * @param snakeCase the string in snake case to convert + * @return the string converted to camel case, with a lowercase first character + */ + private static String snakeCaseToLowerCamelCase(String snakeCase) { + return snakeCaseToCamelCase(snakeCase, false); } /** - * This method must match exactly with the corresponding function in protocol compiler. See: - * https://github.com/google/protobuf/blob/v3.0.0/src/google/protobuf/compiler/java/java_helpers.cc#L153 + * Converts a snake case string into upper camel case. + * + *

Some examples:

+ *
+   *     snakeCaseToUpperCamelCase("foo_bar") => "FooBar"
+   *     snakeCaseToUpperCamelCase("foo") => "Foo"
+   * 
+ * + * @param snakeCase the string in snake case to convert + * @return the string converted to camel case, with an uppercase first character + */ + private static String snakeCaseToUpperCamelCase(String snakeCase) { + return snakeCaseToCamelCase(snakeCase, true); + } + + /** + * Converts a snake case string into camel case. + * + *

For better readability, prefer calling either + * {@link #snakeCaseToLowerCamelCase(String)} or {@link #snakeCaseToUpperCamelCase(String)}.

+ * + *

Some examples:

+ *
+   *     snakeCaseToCamelCase("foo_bar", false) => "fooBar"
+   *     snakeCaseToCamelCase("foo_bar", true) => "FooBar"
+   *     snakeCaseToCamelCase("foo", false) => "foo"
+   *     snakeCaseToCamelCase("foo", true) => "Foo"
+   *     snakeCaseToCamelCase("Foo", false) => "foo"
+   *     snakeCaseToCamelCase("fooBar", false) => "fooBar"
+   * 
+ * + *

This implementation of this method must exactly match the corresponding + * function in the protocol compiler. Specifically, the + * {@code UnderscoresToCamelCase} function in + * {@code src/google/protobuf/compiler/java/java_helpers.cc}.

+ * + * @param snakeCase the string in snake case to convert + * @param capFirst true if the first letter of the returned string should be uppercase. + * false if the first letter of the returned string should be lowercase. + * @return the string converted to camel case, with an uppercase or lowercase first + * character depending on if {@code capFirst} is true or false, respectively */ - private static String snakeCaseToCamelCase(String snakeCase) { + private static String snakeCaseToCamelCase(String snakeCase, boolean capFirst) { StringBuilder sb = new StringBuilder(snakeCase.length() + 1); - boolean capNext = false; + boolean capNext = capFirst; for (int ctr = 0; ctr < snakeCase.length(); ctr++) { char next = snakeCase.charAt(ctr); if (next == '_') { @@ -653,7 +756,7 @@ final class DescriptorMessageInfoFactory implements MessageInfoFactory { /** Constructs the name of the get method for the given field in the proto. */ private static String getterForField(String snakeCase) { - String camelCase = snakeCaseToCamelCase(snakeCase); + String camelCase = snakeCaseToLowerCamelCase(snakeCase); StringBuilder builder = new StringBuilder("get"); // Capitalize the first character in the field name. builder.append(Character.toUpperCase(camelCase.charAt(0))); @@ -679,7 +782,7 @@ final class DescriptorMessageInfoFactory implements MessageInfoFactory { } private static OneofInfo newInfo(Class messageType, OneofDescriptor desc) { - String camelCase = snakeCaseToCamelCase(desc.getName()); + String camelCase = snakeCaseToLowerCamelCase(desc.getName()); String valueFieldName = camelCase + "_"; String caseFieldName = camelCase + "Case_"; diff --git a/java/core/src/test/proto/com/google/protobuf/test_bad_identifiers.proto b/java/core/src/test/proto/com/google/protobuf/test_bad_identifiers.proto index 8bf1691431..0f481f5639 100644 --- a/java/core/src/test/proto/com/google/protobuf/test_bad_identifiers.proto +++ b/java/core/src/test/proto/com/google/protobuf/test_bad_identifiers.proto @@ -43,10 +43,61 @@ option java_generic_services = true; // auto-added option java_package = "com.google.protobuf"; option java_outer_classname = "TestBadIdentifiersProto"; -message TestMessage { - optional string cached_size = 1; - optional string serialized_size = 2; - optional string class = 3; +// Message with field names using underscores that conflict with accessors in the base message class in java. +// See kForbiddenWordList in src/google/protobuf/compiler/java/java_helpers.cc +message ForbiddenWordsUnderscoreMessage { + // java.lang.Object + optional bool class = 1; + // com.google.protobuf.MessageLiteOrBuilder + optional bool default_instance_for_type = 2; + // com.google.protobuf.MessageLite + optional bool parser_for_type = 3; + optional bool serialized_size = 4; + // com.google.protobuf.MessageOrBuilder + optional bool all_fields = 5; + optional bool descriptor_for_type = 6; + optional bool initialization_error_string = 7; + optional bool unknown_fields = 8; + // obsolete. kept for backwards compatibility of generated code + optional bool cached_size = 9; +} + +// Message with field names using leading underscores that conflict with accessors in the base message class in java. +// See kForbiddenWordList in src/google/protobuf/compiler/java/java_helpers.cc +message ForbiddenWordsLeadingUnderscoreMessage { + // java.lang.Object + optional bool _class = 1; + // com.google.protobuf.MessageLiteOrBuilder + optional bool _default_instance_for_type = 2; + // com.google.protobuf.MessageLite + optional bool _parser_for_type = 3; + optional bool _serialized_size = 4; + // com.google.protobuf.MessageOrBuilder + optional bool _all_fields = 5; + optional bool _descriptor_for_type = 6; + optional bool _initialization_error_string = 7; + optional bool _unknown_fields = 8; + // obsolete. kept for backwards compatibility of generated code + optional bool _cached_size = 9; +} + +// Message with field names in camel case that conflict with accessors in the base message class in java. +// See kForbiddenWordList in src/google/protobuf/compiler/java/java_helpers.cc +message ForbiddenWordsCamelMessage { + // java.lang.Object + optional bool class = 1; + // com.google.protobuf.MessageLiteOrBuilder + optional bool defaultInstanceForType = 2; + // com.google.protobuf.MessageLite + optional bool serializedSize = 3; + optional bool parserForType = 4; + // com.google.protobuf.MessageOrBuilder: + optional bool initializationErrorString = 5; + optional bool descriptorForType = 6; + optional bool allFields = 7; + optional bool unknownFields = 8; + // obsolete. kept for backwards compatibility of generated code + optional bool cachedSize = 9; } message Descriptor { @@ -84,7 +135,7 @@ message Deprecated { optional int32 field1 = 1 [deprecated = true]; optional TestEnum field2 = 2 [deprecated = true]; - optional TestMessage field3 = 3 [deprecated = true]; + optional ForbiddenWordsUnderscoreMessage field3 = 3 [deprecated = true]; } message Override { @@ -117,7 +168,7 @@ message Double { } service TestConflictingMethodNames { - rpc Override(TestMessage) returns (TestMessage); + rpc Override(ForbiddenWordsUnderscoreMessage) returns (ForbiddenWordsUnderscoreMessage); } message TestConflictingFieldNames { @@ -125,24 +176,24 @@ message TestConflictingFieldNames { UNKNOWN = 0; FOO = 1; } - message TestMessage {} + message ForbiddenWordsUnderscoreMessage {} repeated int32 int32_field = 1; repeated TestEnum enum_field = 2; repeated string string_field = 3; repeated bytes bytes_field = 4; - repeated TestMessage message_field = 5; + repeated ForbiddenWordsUnderscoreMessage message_field = 5; optional int32 int32_field_count = 11; optional TestEnum enum_field_count = 12; optional string string_field_count = 13; optional bytes bytes_field_count = 14; - optional TestMessage message_field_count = 15; + optional ForbiddenWordsUnderscoreMessage message_field_count = 15; repeated int32 Int32Field = 21; // NO_PROTO3 repeated TestEnum EnumField = 22; // NO_PROTO3 repeated string StringField = 23; // NO_PROTO3 repeated bytes BytesField = 24; // NO_PROTO3 - repeated TestMessage MessageField = 25; // NO_PROTO3 + repeated ForbiddenWordsUnderscoreMessage MessageField = 25; // NO_PROTO3 // This field conflicts with "int32_field" as they both generate // the method getInt32FieldList(). diff --git a/src/google/protobuf/compiler/java/java_helpers.cc b/src/google/protobuf/compiler/java/java_helpers.cc index 1b1aebff34..681d597833 100644 --- a/src/google/protobuf/compiler/java/java_helpers.cc +++ b/src/google/protobuf/compiler/java/java_helpers.cc @@ -66,15 +66,26 @@ namespace { const char* kDefaultPackage = ""; -// Names that should be avoided as field names. -// Using them will cause the compiler to generate accessors whose names are -// colliding with methods defined in base classes. +// Names that should be avoided (in UpperCamelCase format). +// Using them will cause the compiler to generate accessors whose names +// collide with methods defined in base classes. +// Keep this list in sync with specialFieldNames in +// java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java const char* kForbiddenWordList[] = { - // message base class: - "cached_size", - "serialized_size", // java.lang.Object: - "class", + "Class", + // com.google.protobuf.MessageLiteOrBuilder: + "DefaultInstanceForType", + // com.google.protobuf.MessageLite: + "ParserForType", + "SerializedSize", + // com.google.protobuf.MessageOrBuilder: + "AllFields", + "DescriptorForType", + "InitializationErrorString", + "UnknownFields", + // obsolete. kept for backwards compatibility of generated code + "CachedSize", }; const std::unordered_set* kReservedNames = @@ -93,7 +104,7 @@ const std::unordered_set* kReservedNames = bool IsForbidden(const std::string& field_name) { for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) { - if (field_name == kForbiddenWordList[i]) { + if (UnderscoresToCamelCase(field_name, true) == kForbiddenWordList[i]) { return true; } }