Merge pull request #302 from haberman/verify-utf8

Verify UTF-8 when parsing proto3 string fields.
pull/13171/head
Joshua Haberman 5 years ago committed by GitHub
commit ed86d98f53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      BUILD
  2. 72
      generated_for_cmake/google/protobuf/descriptor.upb.c
  3. 17
      tests/bindings/lua/test_upb.lua
  4. 56
      upb/decode.c
  5. 7
      upb/def.c
  6. 18
      upbc/generator.cc

@ -765,6 +765,7 @@ cc_test(
"upb/bindings/lua/upb.lua", "upb/bindings/lua/upb.lua",
":descriptor_proto_lua", ":descriptor_proto_lua",
":test_messages_proto3_proto_lua", ":test_messages_proto3_proto_lua",
":test_messages_proto2_proto_lua",
":test_proto_lua", ":test_proto_lua",
"@com_google_protobuf//:conformance_proto", "@com_google_protobuf//:conformance_proto",
"@com_google_protobuf//:descriptor_proto", "@com_google_protobuf//:descriptor_proto",
@ -807,6 +808,12 @@ lua_proto_library(
deps = ["@com_google_protobuf//:test_messages_proto3_proto"], deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
) )
lua_proto_library(
name = "test_messages_proto2_proto_lua",
testonly = 1,
deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
)
# Test the CMake build ######################################################### # Test the CMake build #########################################################
filegroup( filegroup(

@ -36,9 +36,9 @@ static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6]
}; };
static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = { static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1}, {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(12, 24), 2, 0, 9, 1}, {2, UPB_SIZE(12, 24), 2, 0, 12, 1},
{3, UPB_SIZE(36, 72), 0, 0, 9, 3}, {3, UPB_SIZE(36, 72), 0, 0, 12, 3},
{4, UPB_SIZE(40, 80), 0, 0, 11, 3}, {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
{5, UPB_SIZE(44, 88), 0, 1, 11, 3}, {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
{6, UPB_SIZE(48, 96), 0, 4, 11, 3}, {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
@ -47,7 +47,7 @@ static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12]
{9, UPB_SIZE(32, 64), 5, 5, 11, 1}, {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
{10, UPB_SIZE(56, 112), 0, 0, 5, 3}, {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
{11, UPB_SIZE(60, 120), 0, 0, 5, 3}, {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
{12, UPB_SIZE(20, 40), 3, 0, 9, 1}, {12, UPB_SIZE(20, 40), 3, 0, 12, 1},
}; };
const upb_msglayout google_protobuf_FileDescriptorProto_msginit = { const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
@ -67,7 +67,7 @@ static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
}; };
static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = { static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1}, {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(16, 32), 0, 4, 11, 3}, {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
{3, UPB_SIZE(20, 40), 0, 0, 11, 3}, {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
{4, UPB_SIZE(24, 48), 0, 3, 11, 3}, {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
@ -76,7 +76,7 @@ static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
{7, UPB_SIZE(12, 24), 2, 5, 11, 1}, {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
{8, UPB_SIZE(36, 72), 0, 6, 11, 3}, {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
{9, UPB_SIZE(40, 80), 0, 2, 11, 3}, {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
{10, UPB_SIZE(44, 88), 0, 0, 9, 3}, {10, UPB_SIZE(44, 88), 0, 0, 12, 3},
}; };
const upb_msglayout google_protobuf_DescriptorProto_msginit = { const upb_msglayout google_protobuf_DescriptorProto_msginit = {
@ -131,16 +131,16 @@ static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1
}; };
static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = { static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
{1, UPB_SIZE(36, 40), 6, 0, 9, 1}, {1, UPB_SIZE(36, 40), 6, 0, 12, 1},
{2, UPB_SIZE(44, 56), 7, 0, 9, 1}, {2, UPB_SIZE(44, 56), 7, 0, 12, 1},
{3, UPB_SIZE(24, 24), 3, 0, 5, 1}, {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
{4, UPB_SIZE(8, 8), 1, 0, 14, 1}, {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
{5, UPB_SIZE(16, 16), 2, 0, 14, 1}, {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
{6, UPB_SIZE(52, 72), 8, 0, 9, 1}, {6, UPB_SIZE(52, 72), 8, 0, 12, 1},
{7, UPB_SIZE(60, 88), 9, 0, 9, 1}, {7, UPB_SIZE(60, 88), 9, 0, 12, 1},
{8, UPB_SIZE(76, 120), 11, 0, 11, 1}, {8, UPB_SIZE(76, 120), 11, 0, 11, 1},
{9, UPB_SIZE(28, 28), 4, 0, 5, 1}, {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
{10, UPB_SIZE(68, 104), 10, 0, 9, 1}, {10, UPB_SIZE(68, 104), 10, 0, 12, 1},
{17, UPB_SIZE(32, 32), 5, 0, 8, 1}, {17, UPB_SIZE(32, 32), 5, 0, 8, 1},
}; };
@ -155,7 +155,7 @@ static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1
}; };
static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = { static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1}, {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(12, 24), 2, 0, 11, 1}, {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
}; };
@ -172,11 +172,11 @@ static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3]
}; };
static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = { static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1}, {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(16, 32), 0, 2, 11, 3}, {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
{3, UPB_SIZE(12, 24), 2, 1, 11, 1}, {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
{4, UPB_SIZE(20, 40), 0, 0, 11, 3}, {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
{5, UPB_SIZE(24, 48), 0, 0, 9, 3}, {5, UPB_SIZE(24, 48), 0, 0, 12, 3},
}; };
const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = { const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
@ -201,7 +201,7 @@ static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_subms
}; };
static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = { static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
{1, UPB_SIZE(8, 8), 2, 0, 9, 1}, {1, UPB_SIZE(8, 8), 2, 0, 12, 1},
{2, UPB_SIZE(4, 4), 1, 0, 5, 1}, {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
{3, UPB_SIZE(16, 24), 3, 0, 11, 1}, {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
}; };
@ -218,7 +218,7 @@ static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs
}; };
static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = { static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1}, {1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(16, 32), 0, 0, 11, 3}, {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
{3, UPB_SIZE(12, 24), 2, 1, 11, 1}, {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
}; };
@ -234,9 +234,9 @@ static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[
}; };
static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = { static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
{1, UPB_SIZE(4, 8), 3, 0, 9, 1}, {1, UPB_SIZE(4, 8), 3, 0, 12, 1},
{2, UPB_SIZE(12, 24), 4, 0, 9, 1}, {2, UPB_SIZE(12, 24), 4, 0, 12, 1},
{3, UPB_SIZE(20, 40), 5, 0, 9, 1}, {3, UPB_SIZE(20, 40), 5, 0, 12, 1},
{4, UPB_SIZE(28, 56), 6, 0, 11, 1}, {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
{5, UPB_SIZE(1, 1), 1, 0, 8, 1}, {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
{6, UPB_SIZE(2, 2), 2, 0, 8, 1}, {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
@ -253,11 +253,11 @@ static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
}; };
static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = { static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
{1, UPB_SIZE(28, 32), 11, 0, 9, 1}, {1, UPB_SIZE(28, 32), 11, 0, 12, 1},
{8, UPB_SIZE(36, 48), 12, 0, 9, 1}, {8, UPB_SIZE(36, 48), 12, 0, 12, 1},
{9, UPB_SIZE(8, 8), 1, 0, 14, 1}, {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
{10, UPB_SIZE(16, 16), 2, 0, 8, 1}, {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
{11, UPB_SIZE(44, 64), 13, 0, 9, 1}, {11, UPB_SIZE(44, 64), 13, 0, 12, 1},
{16, UPB_SIZE(17, 17), 3, 0, 8, 1}, {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
{17, UPB_SIZE(18, 18), 4, 0, 8, 1}, {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
{18, UPB_SIZE(19, 19), 5, 0, 8, 1}, {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
@ -265,14 +265,14 @@ static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
{23, UPB_SIZE(21, 21), 7, 0, 8, 1}, {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
{27, UPB_SIZE(22, 22), 8, 0, 8, 1}, {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
{31, UPB_SIZE(23, 23), 9, 0, 8, 1}, {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
{36, UPB_SIZE(52, 80), 14, 0, 9, 1}, {36, UPB_SIZE(52, 80), 14, 0, 12, 1},
{37, UPB_SIZE(60, 96), 15, 0, 9, 1}, {37, UPB_SIZE(60, 96), 15, 0, 12, 1},
{39, UPB_SIZE(68, 112), 16, 0, 9, 1}, {39, UPB_SIZE(68, 112), 16, 0, 12, 1},
{40, UPB_SIZE(76, 128), 17, 0, 9, 1}, {40, UPB_SIZE(76, 128), 17, 0, 12, 1},
{41, UPB_SIZE(84, 144), 18, 0, 9, 1}, {41, UPB_SIZE(84, 144), 18, 0, 12, 1},
{42, UPB_SIZE(24, 24), 10, 0, 8, 1}, {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
{44, UPB_SIZE(92, 160), 19, 0, 9, 1}, {44, UPB_SIZE(92, 160), 19, 0, 12, 1},
{45, UPB_SIZE(100, 176), 20, 0, 9, 1}, {45, UPB_SIZE(100, 176), 20, 0, 12, 1},
{999, UPB_SIZE(108, 192), 0, 0, 11, 3}, {999, UPB_SIZE(108, 192), 0, 0, 11, 3},
}; };
@ -402,12 +402,12 @@ static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1]
static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = { static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
{2, UPB_SIZE(56, 80), 0, 0, 11, 3}, {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
{3, UPB_SIZE(32, 32), 4, 0, 9, 1}, {3, UPB_SIZE(32, 32), 4, 0, 12, 1},
{4, UPB_SIZE(8, 8), 1, 0, 4, 1}, {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
{5, UPB_SIZE(16, 16), 2, 0, 3, 1}, {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
{6, UPB_SIZE(24, 24), 3, 0, 1, 1}, {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
{7, UPB_SIZE(40, 48), 5, 0, 12, 1}, {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
{8, UPB_SIZE(48, 64), 6, 0, 9, 1}, {8, UPB_SIZE(48, 64), 6, 0, 12, 1},
}; };
const upb_msglayout google_protobuf_UninterpretedOption_msginit = { const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
@ -417,7 +417,7 @@ const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
}; };
static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = { static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
{1, UPB_SIZE(4, 8), 2, 0, 9, 2}, {1, UPB_SIZE(4, 8), 2, 0, 12, 2},
{2, UPB_SIZE(1, 1), 1, 0, 8, 2}, {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
}; };
@ -444,9 +444,9 @@ const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = { static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
{1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED}, {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
{2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED}, {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
{3, UPB_SIZE(4, 8), 1, 0, 9, 1}, {3, UPB_SIZE(4, 8), 1, 0, 12, 1},
{4, UPB_SIZE(12, 24), 2, 0, 9, 1}, {4, UPB_SIZE(12, 24), 2, 0, 12, 1},
{6, UPB_SIZE(28, 56), 0, 0, 9, 3}, {6, UPB_SIZE(28, 56), 0, 0, 12, 3},
}; };
const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = { const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
@ -471,7 +471,7 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = { static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
{1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED}, {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
{2, UPB_SIZE(12, 16), 3, 0, 9, 1}, {2, UPB_SIZE(12, 16), 3, 0, 12, 1},
{3, UPB_SIZE(4, 4), 1, 0, 5, 1}, {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
{4, UPB_SIZE(8, 8), 2, 0, 5, 1}, {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
}; };

@ -3,6 +3,7 @@ local upb = require "lupb"
local lunit = require "lunit" local lunit = require "lunit"
local upb_test = require "tests.test_pb" local upb_test = require "tests.test_pb"
local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb" local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
local descriptor = require "google.protobuf.descriptor_pb" local descriptor = require "google.protobuf.descriptor_pb"
if _VERSION >= 'Lua 5.2' then if _VERSION >= 'Lua 5.2' then
@ -69,6 +70,22 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6]) assert_equal(12, msg2.map_int32_int32[6])
end end
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"
local serialized = upb.encode(proto2_msg)
-- Decoding invalid UTF-8 succeeds in proto2.
upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
-- Decoding invalid UTF-8 fails in proto2.
assert_error_match("Error decoding protobuf", function()
upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
end)
-- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
end
function test_string_double_map() function test_string_double_map()
msg = upb_test.MapTest() msg = upb_test.MapTest()
msg.map_string_double["one"] = 1.0 msg.map_string_double["one"] = 1.0

@ -62,11 +62,13 @@ static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) |
(1 << UPB_DTYPE_SFIXED64); (1 << UPB_DTYPE_SFIXED64);
/* Op: an action to be performed for a wire-type/field-type combination. */ /* Op: an action to be performed for a wire-type/field-type combination. */
#define OP_SCALAR_LG2(n) (n) #define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
#define OP_FIXPCK_LG2(n) (n + 4)
#define OP_VARPCK_LG2(n) (n + 8)
#define OP_STRING 4 #define OP_STRING 4
#define OP_SUBMSG 5 #define OP_BYTES 5
#define OP_SUBMSG 6
/* Ops above are scalar-only. Repeated fields can use any op. */
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
static const int8_t varint_ops[19] = { static const int8_t varint_ops[19] = {
-1, /* field not found */ -1, /* field not found */
@ -104,7 +106,7 @@ static const int8_t delim_ops[37] = {
OP_STRING, /* STRING */ OP_STRING, /* STRING */
-1, /* GROUP */ -1, /* GROUP */
OP_SUBMSG, /* MESSAGE */ OP_SUBMSG, /* MESSAGE */
OP_STRING, /* BYTES */ OP_BYTES, /* BYTES */
-1, /* UINT32 */ -1, /* UINT32 */
-1, /* ENUM */ -1, /* ENUM */
-1, /* SFIXED32 */ -1, /* SFIXED32 */
@ -123,7 +125,7 @@ static const int8_t delim_ops[37] = {
OP_STRING, /* REPEATED STRING */ OP_STRING, /* REPEATED STRING */
OP_SUBMSG, /* REPEATED GROUP */ OP_SUBMSG, /* REPEATED GROUP */
OP_SUBMSG, /* REPEATED MESSAGE */ OP_SUBMSG, /* REPEATED MESSAGE */
OP_STRING, /* REPEATED BYTES */ OP_BYTES, /* REPEATED BYTES */
OP_VARPCK_LG2(2), /* REPEATED UINT32 */ OP_VARPCK_LG2(2), /* REPEATED UINT32 */
OP_VARPCK_LG2(2), /* REPEATED ENUM */ OP_VARPCK_LG2(2), /* REPEATED ENUM */
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
@ -155,6 +157,40 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); } UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
static const uint8_t utf8_offset[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0,
};
int i, j;
uint8_t offset;
i = 0;
while (i < len) {
offset = utf8_offset[(uint8_t)buf[i]];
if (offset == 0 || i + offset > len) {
decode_err(d);
}
for (j = i + 1; j < i + offset; j++) {
if ((buf[j] & 0xc0) != 0x80) {
decode_err(d);
}
}
i += offset;
}
if (i != len) decode_err(d);
}
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) { static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
bool need_realloc = arr->size - arr->len < elem; bool need_realloc = arr->size - arr->len < elem;
if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) { if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) {
@ -300,7 +336,10 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
memcpy(mem, &val, 1 << op); memcpy(mem, &val, 1 << op);
return ptr; return ptr;
case OP_STRING: case OP_STRING:
/* Append string. */ decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */
case OP_BYTES:
/* Append bytes. */
mem = mem =
UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void); UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
arr->len++; arr->len++;
@ -434,6 +473,9 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
break; break;
} }
case OP_STRING: case OP_STRING:
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */
case OP_BYTES:
memcpy(mem, &val, sizeof(upb_strview)); memcpy(mem, &val, sizeof(upb_strview));
break; break;
case OP_SCALAR_LG2(3): case OP_SCALAR_LG2(3):

@ -1013,6 +1013,13 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
field->descriptortype = upb_fielddef_descriptortype(f); field->descriptortype = upb_fielddef_descriptortype(f);
field->label = upb_fielddef_label(f); field->label = upb_fielddef_label(f);
if (field->descriptortype == UPB_DTYPE_STRING &&
f->file->syntax == UPB_SYNTAX_PROTO2) {
/* See TableDescriptorType() in upbc/generator.cc for details and
* rationale. */
field->descriptortype = UPB_DTYPE_BYTES;
}
if (upb_fielddef_ismap(f)) { if (upb_fielddef_ismap(f)) {
field->label = _UPB_LABEL_MAP; field->label = _UPB_LABEL_MAP;
} else if (upb_fielddef_packed(f)) { } else if (upb_fielddef_packed(f)) {

@ -687,6 +687,22 @@ void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
ToPreproc(file->name())); ToPreproc(file->name()));
} }
int TableDescriptorType(const protobuf::FieldDescriptor* field) {
if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 &&
field->type() == protobuf::FieldDescriptor::TYPE_STRING) {
// From the perspective of the binary encoder/decoder, proto2 string fields
// are identical to bytes fields. Only in proto3 do we check UTF-8 for
// string fields at parse time.
//
// If we ever use these tables for JSON encoding/decoding (for example by
// embedding field names on the side) we will have to revisit this, because
// string vs. bytes behavior is not affected by proto2 vs proto3.
return protobuf::FieldDescriptor::TYPE_BYTES;
} else {
return field->type();
}
}
void WriteSource(const protobuf::FileDescriptor* file, Output& output) { void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
EmitFileWarning(file, output); EmitFileWarning(file, output);
@ -781,7 +797,7 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
GetSizeInit(layout.GetFieldOffset(field)), GetSizeInit(layout.GetFieldOffset(field)),
presence, presence,
submsg_index, submsg_index,
field->type(), TableDescriptorType(field),
label); label);
} }
output("};\n\n"); output("};\n\n");

Loading…
Cancel
Save