Added a test for UTF-8 parse checking and added missing error reporting.

pull/13171/head
Joshua Haberman 5 years ago
parent 2c666bc8f6
commit 8e26a33bcb
  1. 7
      BUILD
  2. 17
      tests/bindings/lua/test_upb.lua
  3. 12
      upb/decode.c

@ -765,6 +765,7 @@ cc_test(
"upb/bindings/lua/upb.lua", "upb/bindings/lua/upb.lua",
":descriptor_proto_lua", ":descriptor_proto_lua",
":test_messages_proto3_proto_lua", ":test_messages_proto3_proto_lua",
":test_messages_proto2_proto_lua",
":test_proto_lua", ":test_proto_lua",
"@com_google_protobuf//:conformance_proto", "@com_google_protobuf//:conformance_proto",
"@com_google_protobuf//:descriptor_proto", "@com_google_protobuf//:descriptor_proto",
@ -807,6 +808,12 @@ lua_proto_library(
deps = ["@com_google_protobuf//:test_messages_proto3_proto"], deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
) )
lua_proto_library(
name = "test_messages_proto2_proto_lua",
testonly = 1,
deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
)
# Test the CMake build ######################################################### # Test the CMake build #########################################################
filegroup( filegroup(

@ -3,6 +3,7 @@ local upb = require "lupb"
local lunit = require "lunit" local lunit = require "lunit"
local upb_test = require "tests.test_pb" local upb_test = require "tests.test_pb"
local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb" local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
local descriptor = require "google.protobuf.descriptor_pb" local descriptor = require "google.protobuf.descriptor_pb"
if _VERSION >= 'Lua 5.2' then if _VERSION >= 'Lua 5.2' then
@ -69,6 +70,22 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6]) assert_equal(12, msg2.map_int32_int32[6])
end end
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"
local serialized = upb.encode(proto2_msg)
-- Decoding invalid UTF-8 succeeds in proto2.
upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
-- Decoding invalid UTF-8 fails in proto2.
assert_error_match("Error decoding protobuf", function()
upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
end)
-- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
end
function test_string_double_map() function test_string_double_map()
msg = upb_test.MapTest() msg = upb_test.MapTest()
msg.map_string_double["one"] = 1.0 msg.map_string_double["one"] = 1.0

@ -157,7 +157,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); } UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
bool decode_verifyutf8(const char *buf, int len) { void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
static const uint8_t utf8_offset[] = { static const uint8_t utf8_offset[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -179,16 +179,16 @@ bool decode_verifyutf8(const char *buf, int len) {
while (i < len) { while (i < len) {
offset = utf8_offset[(uint8_t)buf[i]]; offset = utf8_offset[(uint8_t)buf[i]];
if (offset == 0 || i + offset > len) { if (offset == 0 || i + offset > len) {
return false; decode_err(d);
} }
for (j = i + 1; j < i + offset; j++) { for (j = i + 1; j < i + offset; j++) {
if ((buf[j] & 0xc0) != 0x80) { if ((buf[j] & 0xc0) != 0x80) {
return false; decode_err(d);
} }
} }
i += offset; i += offset;
} }
return i == len; if (i != len) decode_err(d);
} }
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) { static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
@ -336,7 +336,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
memcpy(mem, &val, 1 << op); memcpy(mem, &val, 1 << op);
return ptr; return ptr;
case OP_STRING: case OP_STRING:
decode_verifyutf8(val.str_val.data, val.str_val.size); decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */ /* Fallthrough. */
case OP_BYTES: case OP_BYTES:
/* Append bytes. */ /* Append bytes. */
@ -473,7 +473,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
break; break;
} }
case OP_STRING: case OP_STRING:
decode_verifyutf8(val.str_val.data, val.str_val.size); decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */ /* Fallthrough. */
case OP_BYTES: case OP_BYTES:
memcpy(mem, &val, sizeof(upb_strview)); memcpy(mem, &val, sizeof(upb_strview));

Loading…
Cancel
Save