Fix missing detection for invalid UTF-8 characters (#30406)

* Fix missing detection for invalid UTF-8 characters

* Add comments
pull/30317/head^2
Cheng-Yu Chung 2 years ago committed by GitHub
parent 15c5a7dfb7
commit 8d792b7721
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      src/core/lib/json/json_reader.cc
  2. 1
      test/core/json/corpus/testcase-4743649675313152
  3. 2
      test/core/json/json_test.cc

@ -148,7 +148,10 @@ bool JsonReader::StringAddChar(uint32_t c) {
utf8_bytes_remaining_ = 1;
} else if ((c & 0xf0) == 0xe0) {
utf8_bytes_remaining_ = 2;
} else if ((c & 0xf8) == 0xf0) {
} else if ((c & 0xf8) == 0xf0 && c <= 0xf4) {
/// For the UTF-8 characters with length of 4 bytes, the range of the
/// first byte is [0xf0, 0xf4]. Reference: Table 3-7 in
/// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
utf8_bytes_remaining_ = 3;
} else {
return false;

@ -0,0 +1 @@
"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>"

@ -137,6 +137,8 @@ TEST(Json, Utf8) {
EXPECT_THAT("\"\xf0\x80\x80\x80\"", ContainsInvalidUtf8());
/// If the first byte is 0xf4, the range of second byte is [0x80, 0x8f].
EXPECT_THAT("\"\xf4\x90\x80\x80\"", ContainsInvalidUtf8());
/// The range of the first bytes is [0xf0, 0xf4].
EXPECT_THAT("\"\xf5\x80\x80\x80\"", ContainsInvalidUtf8());
}
TEST(Json, NestedEmptyContainers) {

Loading…
Cancel
Save