Added a test for UTF-8 parse checking and added missing error reporting.

pull/13171/head
Joshua Haberman 4 years ago
parent 2c666bc8f6
commit 8e26a33bcb
  1. 7
      BUILD
  2. 17
      tests/bindings/lua/test_upb.lua
  3. 12
      upb/decode.c

@ -765,6 +765,7 @@ cc_test(
"upb/bindings/lua/upb.lua",
":descriptor_proto_lua",
":test_messages_proto3_proto_lua",
":test_messages_proto2_proto_lua",
":test_proto_lua",
"@com_google_protobuf//:conformance_proto",
"@com_google_protobuf//:descriptor_proto",
@ -807,6 +808,12 @@ lua_proto_library(
deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
)
lua_proto_library(
name = "test_messages_proto2_proto_lua",
testonly = 1,
deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
)
# Test the CMake build #########################################################
filegroup(

@ -3,6 +3,7 @@ local upb = require "lupb"
local lunit = require "lunit"
local upb_test = require "tests.test_pb"
local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
local descriptor = require "google.protobuf.descriptor_pb"
if _VERSION >= 'Lua 5.2' then
@ -69,6 +70,22 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6])
end
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"
local serialized = upb.encode(proto2_msg)
-- Decoding invalid UTF-8 succeeds in proto2.
upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
-- Decoding invalid UTF-8 fails in proto2.
assert_error_match("Error decoding protobuf", function()
upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
end)
-- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
end
function test_string_double_map()
msg = upb_test.MapTest()
msg.map_string_double["one"] = 1.0

@ -157,7 +157,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
bool decode_verifyutf8(const char *buf, int len) {
void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
static const uint8_t utf8_offset[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -179,16 +179,16 @@ bool decode_verifyutf8(const char *buf, int len) {
while (i < len) {
offset = utf8_offset[(uint8_t)buf[i]];
if (offset == 0 || i + offset > len) {
return false;
decode_err(d);
}
for (j = i + 1; j < i + offset; j++) {
if ((buf[j] & 0xc0) != 0x80) {
return false;
decode_err(d);
}
}
i += offset;
}
return i == len;
if (i != len) decode_err(d);
}
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
@ -336,7 +336,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
memcpy(mem, &val, 1 << op);
return ptr;
case OP_STRING:
decode_verifyutf8(val.str_val.data, val.str_val.size);
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */
case OP_BYTES:
/* Append bytes. */
@ -473,7 +473,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
break;
}
case OP_STRING:
decode_verifyutf8(val.str_val.data, val.str_val.size);
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */
case OP_BYTES:
memcpy(mem, &val, sizeof(upb_strview));

Loading…
Cancel
Save