Added a test for UTF-8 parse checking and added missing error reporting.

5 years ago · 8e26a33bcb
parent 2c666bc8f6
commit 8e26a33bcb
3 changed files with 30 additions and 6 deletions
--- a/7
+++ b/7
@ -765,6 +765,7 @@ cc_test(
        "upb/bindings/lua/upb.lua",
        ":descriptor_proto_lua",
        ":test_messages_proto3_proto_lua",
+        ":test_messages_proto2_proto_lua",
        ":test_proto_lua",
        "@com_google_protobuf//:conformance_proto",
        "@com_google_protobuf//:descriptor_proto",
@ -807,6 +808,12 @@ lua_proto_library(
    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
 )

+lua_proto_library(
+    name = "test_messages_proto2_proto_lua",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
+)
+
 # Test the CMake build #########################################################

 filegroup(
--- a/tests/bindings/lua/test_upb.lua
+++ b/tests/bindings/lua/test_upb.lua
@ -3,6 +3,7 @@ local upb = require "lupb"
 local lunit = require "lunit"
 local upb_test = require "tests.test_pb"
 local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
+local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
 local descriptor = require "google.protobuf.descriptor_pb"

 if _VERSION >= 'Lua 5.2' then
@ -69,6 +70,22 @@ function test_msg_map()
  assert_equal(12, msg2.map_int32_int32[6])
 end

+function test_utf8()
+  local proto2_msg = test_messages_proto2.TestAllTypesProto2()
+  proto2_msg.optional_string = "\xff"
+  local serialized = upb.encode(proto2_msg)
+
+  -- Decoding invalid UTF-8 succeeds in proto2.
+  upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
+
+  -- Decoding invalid UTF-8 fails in proto2.
+  assert_error_match("Error decoding protobuf", function()
+    upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
+  end)
+
+  -- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
+end
+
 function test_string_double_map()
  msg = upb_test.MapTest()
  msg.map_string_double["one"] = 1.0
--- a/upb/decode.c
+++ b/upb/decode.c
@ -157,7 +157,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,

 UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }

-bool decode_verifyutf8(const char *buf, int len) {
+void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
  static const uint8_t utf8_offset[] = {
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -179,16 +179,16 @@ bool decode_verifyutf8(const char *buf, int len) {
  while (i < len) {
    offset = utf8_offset[(uint8_t)buf[i]];
    if (offset == 0 || i + offset > len) {
-      return false;
+      decode_err(d);
    }
    for (j = i + 1; j < i + offset; j++) {
      if ((buf[j] & 0xc0) != 0x80) {
-        return false;
+        decode_err(d);
      }
    }
    i += offset;
  }
-  return i == len;
+  if (i != len) decode_err(d);
 }

 static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
@ -336,7 +336,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
      memcpy(mem, &val, 1 << op);
      return ptr;
    case OP_STRING:
-      decode_verifyutf8(val.str_val.data, val.str_val.size);
+      decode_verifyutf8(d, val.str_val.data, val.str_val.size);
      /* Fallthrough. */
    case OP_BYTES:
      /* Append bytes. */
@ -473,7 +473,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
      break;
    }
    case OP_STRING:
-      decode_verifyutf8(val.str_val.data, val.str_val.size);
+      decode_verifyutf8(d, val.str_val.data, val.str_val.size);
      /* Fallthrough. */
    case OP_BYTES:
      memcpy(mem, &val, sizeof(upb_strview));