Introduce recursion limit in Java text format parsing

This will make the text format parser reject inputs that have a nesting depth
greater than 150. We will soon lower this number down to 100 for consistency
with other implementations.

For use cases that rely on parsing deeply nested text protos, it is possible to
create a Parser with a custom recursion limit like this:
`TextFormat.Parser.newBuilder().setRecursionLimit(i).build()`.

PiperOrigin-RevId: 552543574
pull/13416/head
Adam Cozzette 1 year ago committed by Copybara-Service
parent 7fb9287c45
commit bca3bed137
  1. 119
      java/core/src/main/java/com/google/protobuf/TextFormat.java
  2. 97
      java/core/src/test/java/com/google/protobuf/TextFormatTest.java

@ -1577,6 +1577,7 @@ public final class TextFormat {
private final boolean allowUnknownExtensions;
private final SingularOverwritePolicy singularOverwritePolicy;
private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
private final int recursionLimit;
private Parser(
TypeRegistry typeRegistry,
@ -1584,13 +1585,15 @@ public final class TextFormat {
boolean allowUnknownEnumValues,
boolean allowUnknownExtensions,
SingularOverwritePolicy singularOverwritePolicy,
TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
TextFormatParseInfoTree.Builder parseInfoTreeBuilder,
int recursionLimit) {
this.typeRegistry = typeRegistry;
this.allowUnknownFields = allowUnknownFields;
this.allowUnknownEnumValues = allowUnknownEnumValues;
this.allowUnknownExtensions = allowUnknownExtensions;
this.singularOverwritePolicy = singularOverwritePolicy;
this.parseInfoTreeBuilder = parseInfoTreeBuilder;
this.recursionLimit = recursionLimit;
}
/** Returns a new instance of {@link Builder}. */
@ -1607,6 +1610,7 @@ public final class TextFormat {
SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
private TypeRegistry typeRegistry = TypeRegistry.getEmptyTypeRegistry();
private int recursionLimit = 150;
/**
* Sets the TypeRegistry for resolving Any. If this is not set, TextFormat will not be able to
@ -1653,6 +1657,15 @@ public final class TextFormat {
return this;
}
/**
* Set the maximum recursion limit that the parser will allow. If the depth of the message
* exceeds this limit then the parser will stop and throw an exception.
*/
public Builder setRecursionLimit(int recursionLimit) {
this.recursionLimit = recursionLimit;
return this;
}
public Parser build() {
return new Parser(
typeRegistry,
@ -1660,7 +1673,8 @@ public final class TextFormat {
allowUnknownEnumValues,
allowUnknownExtensions,
singularOverwritePolicy,
parseInfoTreeBuilder);
parseInfoTreeBuilder,
recursionLimit);
}
}
@ -1784,7 +1798,7 @@ public final class TextFormat {
List<UnknownField> unknownFields = new ArrayList<UnknownField>();
while (!tokenizer.atEnd()) {
mergeField(tokenizer, extensionRegistry, target, unknownFields);
mergeField(tokenizer, extensionRegistry, target, unknownFields, recursionLimit);
}
checkUnknownFields(unknownFields);
}
@ -1794,9 +1808,16 @@ public final class TextFormat {
final Tokenizer tokenizer,
final ExtensionRegistry extensionRegistry,
final MessageReflection.MergeTarget target,
List<UnknownField> unknownFields)
List<UnknownField> unknownFields,
int recursionLimit)
throws ParseException {
mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder, unknownFields);
mergeField(
tokenizer,
extensionRegistry,
target,
parseInfoTreeBuilder,
unknownFields,
recursionLimit);
}
/** Parse a single field from {@code tokenizer} and merge it into {@code target}. */
@ -1805,7 +1826,8 @@ public final class TextFormat {
final ExtensionRegistry extensionRegistry,
final MessageReflection.MergeTarget target,
TextFormatParseInfoTree.Builder parseTreeBuilder,
List<UnknownField> unknownFields)
List<UnknownField> unknownFields,
int recursionLimit)
throws ParseException {
FieldDescriptor field = null;
String name;
@ -1815,8 +1837,17 @@ public final class TextFormat {
ExtensionRegistry.ExtensionInfo extension = null;
if ("google.protobuf.Any".equals(type.getFullName()) && tokenizer.tryConsume("[")) {
if (recursionLimit < 1) {
throw tokenizer.parseException("Message is nested too deep");
}
mergeAnyFieldValue(
tokenizer, extensionRegistry, target, parseTreeBuilder, unknownFields, type);
tokenizer,
extensionRegistry,
target,
parseTreeBuilder,
unknownFields,
type,
recursionLimit - 1);
return;
}
@ -1895,7 +1926,7 @@ public final class TextFormat {
// Skips unknown fields.
if (field == null) {
detectSilentMarker(tokenizer, type, name);
guessFieldTypeAndSkip(tokenizer, type);
guessFieldTypeAndSkip(tokenizer, type, recursionLimit);
return;
}
@ -1913,7 +1944,8 @@ public final class TextFormat {
field,
extension,
childParseTreeBuilder,
unknownFields);
unknownFields,
recursionLimit);
} else {
consumeFieldValues(
tokenizer,
@ -1922,7 +1954,8 @@ public final class TextFormat {
field,
extension,
parseTreeBuilder,
unknownFields);
unknownFields,
recursionLimit);
}
} else {
detectSilentMarker(tokenizer, type, field.getFullName());
@ -1934,7 +1967,8 @@ public final class TextFormat {
field,
extension,
parseTreeBuilder,
unknownFields);
unknownFields,
recursionLimit);
}
if (parseTreeBuilder != null) {
@ -1981,7 +2015,8 @@ public final class TextFormat {
final FieldDescriptor field,
final ExtensionRegistry.ExtensionInfo extension,
final TextFormatParseInfoTree.Builder parseTreeBuilder,
List<UnknownField> unknownFields)
List<UnknownField> unknownFields,
int recursionLimit)
throws ParseException {
// Support specifying repeated field values as a comma-separated list.
// Ex."foo: [1, 2, 3]"
@ -1995,7 +2030,8 @@ public final class TextFormat {
field,
extension,
parseTreeBuilder,
unknownFields);
unknownFields,
recursionLimit);
if (tokenizer.tryConsume("]")) {
// End of list.
break;
@ -2011,7 +2047,8 @@ public final class TextFormat {
field,
extension,
parseTreeBuilder,
unknownFields);
unknownFields,
recursionLimit);
}
}
@ -2023,7 +2060,8 @@ public final class TextFormat {
final FieldDescriptor field,
final ExtensionRegistry.ExtensionInfo extension,
final TextFormatParseInfoTree.Builder parseTreeBuilder,
List<UnknownField> unknownFields)
List<UnknownField> unknownFields,
int recursionLimit)
throws ParseException {
if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES
&& !field.isRepeated()) {
@ -2047,6 +2085,10 @@ public final class TextFormat {
Object value = null;
if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
if (recursionLimit < 1) {
throw tokenizer.parseException("Message is nested too deep");
}
final String endToken;
if (tokenizer.tryConsume("<")) {
endToken = ">";
@ -2063,7 +2105,13 @@ public final class TextFormat {
if (tokenizer.atEnd()) {
throw tokenizer.parseException("Expected \"" + endToken + "\".");
}
mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, unknownFields);
mergeField(
tokenizer,
extensionRegistry,
subField,
parseTreeBuilder,
unknownFields,
recursionLimit - 1);
}
value = subField.finish();
@ -2178,7 +2226,8 @@ public final class TextFormat {
MergeTarget target,
final TextFormatParseInfoTree.Builder parseTreeBuilder,
List<UnknownField> unknownFields,
Descriptor anyDescriptor)
Descriptor anyDescriptor,
int recursionLimit)
throws ParseException {
// Try to parse human readable format of Any in the form: [type_url]: { ... }
StringBuilder typeUrlBuilder = new StringBuilder();
@ -2224,7 +2273,13 @@ public final class TextFormat {
MessageReflection.BuilderAdapter contentTarget =
new MessageReflection.BuilderAdapter(contentBuilder);
while (!tokenizer.tryConsume(anyEndToken)) {
mergeField(tokenizer, extensionRegistry, contentTarget, parseTreeBuilder, unknownFields);
mergeField(
tokenizer,
extensionRegistry,
contentTarget,
parseTreeBuilder,
unknownFields,
recursionLimit);
}
target.setField(anyDescriptor.findFieldByName("type_url"), typeUrlBuilder.toString());
@ -2233,10 +2288,11 @@ public final class TextFormat {
}
/** Skips the next field including the field's name and value. */
private void skipField(Tokenizer tokenizer, Descriptor type) throws ParseException {
private void skipField(Tokenizer tokenizer, Descriptor type, int recursionLimit)
throws ParseException {
String name = consumeFullTypeName(tokenizer);
detectSilentMarker(tokenizer, type, name);
guessFieldTypeAndSkip(tokenizer, type);
guessFieldTypeAndSkip(tokenizer, type, recursionLimit);
// For historical reasons, fields may optionally be separated by commas or
// semicolons.
@ -2248,7 +2304,8 @@ public final class TextFormat {
/**
* Skips the whole body of a message including the beginning delimiter and the ending delimiter.
*/
private void skipFieldMessage(Tokenizer tokenizer, Descriptor type) throws ParseException {
private void skipFieldMessage(Tokenizer tokenizer, Descriptor type, int recursionLimit)
throws ParseException {
final String delimiter;
if (tokenizer.tryConsume("<")) {
delimiter = ">";
@ -2257,7 +2314,7 @@ public final class TextFormat {
delimiter = "}";
}
while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
skipField(tokenizer, type);
skipField(tokenizer, type, recursionLimit);
}
tokenizer.consume(delimiter);
}
@ -2283,16 +2340,20 @@ public final class TextFormat {
* be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"),
* if it is repeated scalar, there must be a ":" between the field name and the starting "[" .
*/
private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type) throws ParseException {
private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type, int recursionLimit)
throws ParseException {
boolean semicolonConsumed = tokenizer.tryConsume(":");
if (tokenizer.lookingAt("[")) {
// Short repeated field form. If a semicolon was consumed, it could be repeated scalar or
// repeated message. If not, it must be repeated message.
skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type);
skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type, recursionLimit);
} else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
skipFieldValue(tokenizer);
} else {
skipFieldMessage(tokenizer, type);
if (recursionLimit < 1) {
throw tokenizer.parseException("Message is nested too deep");
}
skipFieldMessage(tokenizer, type, recursionLimit - 1);
}
}
@ -2302,7 +2363,8 @@ public final class TextFormat {
* <p>Reports an error if scalar type is not allowed but showing up inside "[]".
*/
private void skipFieldShortFormedRepeated(
Tokenizer tokenizer, boolean scalarAllowed, Descriptor type) throws ParseException {
Tokenizer tokenizer, boolean scalarAllowed, Descriptor type, int recursionLimit)
throws ParseException {
if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) {
// Try skipping "[]".
return;
@ -2311,7 +2373,10 @@ public final class TextFormat {
while (true) {
if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) {
// Try skipping message field inside "[]"
skipFieldMessage(tokenizer, type);
if (recursionLimit < 1) {
throw tokenizer.parseException("Message is nested too deep");
}
skipFieldMessage(tokenizer, type, recursionLimit - 1);
} else if (scalarAllowed) {
// Try skipping scalar field inside "[]".
skipFieldValue(tokenizer);

@ -56,6 +56,7 @@ import protobuf_unittest.UnittestProto.TestAllTypes;
import protobuf_unittest.UnittestProto.TestAllTypes.NestedMessage;
import protobuf_unittest.UnittestProto.TestEmptyMessage;
import protobuf_unittest.UnittestProto.TestOneof2;
import protobuf_unittest.UnittestProto.TestRecursiveMessage;
import protobuf_unittest.UnittestProto.TestRequired;
import protobuf_unittest.UnittestProto.TestReservedFields;
import proto2_wireformat_unittest.UnittestMsetWireFormat.TestMessageSet;
@ -1842,4 +1843,100 @@ public class TextFormatTest {
assertThat(TextFormat.printer().printToString(message))
.isEqualTo("optional_float: -0.0\noptional_double: -0.0\n");
}
private TestRecursiveMessage makeRecursiveMessage(int depth) {
if (depth == 0) {
return TestRecursiveMessage.newBuilder().setI(5).build();
} else {
return TestRecursiveMessage.newBuilder().setA(makeRecursiveMessage(depth - 1)).build();
}
}
@Test
public void testDefaultRecursionLimit() throws Exception {
String depth150 = TextFormat.printer().printToString(makeRecursiveMessage(150));
String depth151 = TextFormat.printer().printToString(makeRecursiveMessage(151));
TextFormat.parse(depth150, TestRecursiveMessage.class);
try {
TextFormat.parse(depth151, TestRecursiveMessage.class);
assertWithMessage("Parsing deep message should have failed").fail();
} catch (TextFormat.ParseException e) {
assertThat(e).hasMessageThat().contains("too deep");
}
}
@Test
public void testRecursionLimitWithUnknownFields() throws Exception {
TextFormat.Parser parser =
TextFormat.Parser.newBuilder().setAllowUnknownFields(true).setRecursionLimit(2).build();
TestRecursiveMessage.Builder depth2 = TestRecursiveMessage.newBuilder();
parser.merge("u { u { i: 0 } }", depth2);
try {
TestRecursiveMessage.Builder depth3 = TestRecursiveMessage.newBuilder();
parser.merge("u { u { u { } } }", depth3);
assertWithMessage("Parsing deep message should have failed").fail();
} catch (TextFormat.ParseException e) {
assertThat(e).hasMessageThat().contains("too deep");
}
}
@Test
public void testRecursionLimitWithKnownAndUnknownFields() throws Exception {
TextFormat.Parser parser =
TextFormat.Parser.newBuilder().setAllowUnknownFields(true).setRecursionLimit(2).build();
TestRecursiveMessage.Builder depth2 = TestRecursiveMessage.newBuilder();
parser.merge("a { u { i: 0 } }", depth2);
try {
TestRecursiveMessage.Builder depth3 = TestRecursiveMessage.newBuilder();
parser.merge("a { u { u { } } }", depth3);
assertWithMessage("Parsing deep message should have failed").fail();
} catch (TextFormat.ParseException e) {
assertThat(e).hasMessageThat().contains("too deep");
}
}
@Test
public void testRecursionLimitWithAny() throws Exception {
TextFormat.Parser parser =
TextFormat.Parser.newBuilder()
.setRecursionLimit(2)
.setTypeRegistry(TypeRegistry.newBuilder().add(TestAllTypes.getDescriptor()).build())
.build();
TestAny.Builder depth2 = TestAny.newBuilder();
parser.merge(
"value { [type.googleapis.com/protobuf_unittest.TestAllTypes] { optional_int32: 1 } }",
depth2);
try {
TestAny.Builder depth3 = TestAny.newBuilder();
parser.merge(
"value { [type.googleapis.com/protobuf_unittest.TestAllTypes] { optional_nested_message {"
+ "} } }",
depth3);
assertWithMessage("Parsing deep message should have failed").fail();
} catch (TextFormat.ParseException e) {
assertThat(e).hasMessageThat().contains("too deep");
}
}
@Test
public void testRecursionLimitWithTopLevelAny() throws Exception {
TextFormat.Parser parser =
TextFormat.Parser.newBuilder()
.setRecursionLimit(2)
.setTypeRegistry(
TypeRegistry.newBuilder().add(TestRecursiveMessage.getDescriptor()).build())
.build();
Any.Builder depth2 = Any.newBuilder();
parser.merge(
"[type.googleapis.com/protobuf_unittest.TestRecursiveMessage] { a { i: 0 } }", depth2);
try {
Any.Builder depth3 = Any.newBuilder();
parser.merge(
"[type.googleapis.com/protobuf_unittest.TestRecursiveMessage] { a { a { i: 0 } } }",
depth3);
assertWithMessage("Parsing deep message should have failed").fail();
} catch (TextFormat.ParseException e) {
assertThat(e).hasMessageThat().contains("too deep");
}
}
}

Loading…
Cancel
Save