Make JSON UTF-8 boundary check inclusive of the largest possible UTF-8 character.

UTF-8 acceptable codepoints are 0x0000-0x10FFFF, previously we treated this as an inclusive-exclusive bound, but UTF-8 does include it's topmost character.

PiperOrigin-RevId: 572111435
pull/14301/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent d0d52fe3fa
commit e80b8ec620
  1. 2
      src/google/protobuf/json/internal/lexer.cc
  2. 9
      src/google/protobuf/json/internal/lexer_test.cc

@ -335,7 +335,7 @@ absl::StatusOr<size_t> JsonLexer::ParseUnicodeEscape(char out_utf8[4]) {
out_utf8[1] = ((rune >> 6) & 0x3f) | 0x80;
out_utf8[2] = ((rune >> 0) & 0x3f) | 0x80;
return 3;
} else if (rune < 0x10ffff) {
} else if (rune <= 0x10ffff) {
out_utf8[0] = ((rune >> 18) & 0x07) | 0xF0;
out_utf8[1] = ((rune >> 12) & 0x3f) | 0x80;
out_utf8[2] = ((rune >> 6) & 0x3f) | 0x80;

@ -294,6 +294,15 @@ TEST(LexerTest, SimpleString) {
});
}
TEST(LexerTest, UTFBoundaries) {
Do(R"json("\u0001\u07FF\uFFFF\uDBFF\uDFFF")json",
[](io::ZeroCopyInputStream* stream) {
EXPECT_THAT(Value::Parse(stream),
IsOkAndHolds(ValueIs<std::string>(
"\x01\xdf\xbf\xef\xbf\xbf\xf4\x8f\xbf\xbf")));
});
}
TEST(NonStandard, SingleQuoteString) {
DoLegacy(R"json('My String')json", [=](const Value& value) {
EXPECT_THAT(value, ValueIs<std::string>("My String"));

Loading…
Cancel
Save