Merge commit '6d7d35fa3968f00bc252e5314b962a9a3b44c67f' into upb-upgrade

pull/23727/head
Esun Kim 5 years ago
commit 3eaa178894
  1. 3
      third_party/upb/bazel/upb_proto_library.bzl
  2. 72
      third_party/upb/generated_for_cmake/google/protobuf/descriptor.upb.c
  3. 5
      third_party/upb/kokoro/ubuntu/build.sh
  4. 17
      third_party/upb/tests/bindings/lua/test_upb.lua
  5. 17
      third_party/upb/tests/test_cpp.cc
  6. 4
      third_party/upb/tests/test_generated_code.c
  7. 79
      third_party/upb/upb/decode.c
  8. 47
      third_party/upb/upb/def.c
  9. 4
      third_party/upb/upb/encode.c
  10. 6
      third_party/upb/upb/handlers.h
  11. 1
      third_party/upb/upb/json_decode.c
  12. 19
      third_party/upb/upb/json_encode.c
  13. 4
      third_party/upb/upb/msg.h
  14. 2
      third_party/upb/upb/port_def.inc
  15. 11
      third_party/upb/upb/table.c
  16. 9
      third_party/upb/upb/table.int.h
  17. 30
      third_party/upb/upb/upb.c
  18. 26
      third_party/upb/upb/upb.h
  19. 2
      third_party/upb/upb/upb.hpp
  20. 22
      third_party/upb/upbc/generator.cc

@ -121,6 +121,9 @@ _WrappedGeneratedSrcsInfo = provider(fields = ["srcs"])
_WrappedDefsGeneratedSrcsInfo = provider(fields = ["srcs"])
def _compile_upb_protos(ctx, proto_info, proto_sources, ext):
if len(proto_sources) == 0:
return GeneratedSrcsInfo(srcs = [], hdrs = [])
srcs = [_generate_output_file(ctx, name, ext + ".c") for name in proto_sources]
hdrs = [_generate_output_file(ctx, name, ext + ".h") for name in proto_sources]
transitive_sets = proto_info.transitive_descriptor_sets.to_list()

@ -36,9 +36,9 @@ static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6]
};
static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{2, UPB_SIZE(12, 24), 2, 0, 9, 1},
{3, UPB_SIZE(36, 72), 0, 0, 9, 3},
{1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(12, 24), 2, 0, 12, 1},
{3, UPB_SIZE(36, 72), 0, 0, 12, 3},
{4, UPB_SIZE(40, 80), 0, 0, 11, 3},
{5, UPB_SIZE(44, 88), 0, 1, 11, 3},
{6, UPB_SIZE(48, 96), 0, 4, 11, 3},
@ -47,7 +47,7 @@ static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12]
{9, UPB_SIZE(32, 64), 5, 5, 11, 1},
{10, UPB_SIZE(56, 112), 0, 0, 5, 3},
{11, UPB_SIZE(60, 120), 0, 0, 5, 3},
{12, UPB_SIZE(20, 40), 3, 0, 9, 1},
{12, UPB_SIZE(20, 40), 3, 0, 12, 1},
};
const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
@ -67,7 +67,7 @@ static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
};
static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(16, 32), 0, 4, 11, 3},
{3, UPB_SIZE(20, 40), 0, 0, 11, 3},
{4, UPB_SIZE(24, 48), 0, 3, 11, 3},
@ -76,7 +76,7 @@ static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
{7, UPB_SIZE(12, 24), 2, 5, 11, 1},
{8, UPB_SIZE(36, 72), 0, 6, 11, 3},
{9, UPB_SIZE(40, 80), 0, 2, 11, 3},
{10, UPB_SIZE(44, 88), 0, 0, 9, 3},
{10, UPB_SIZE(44, 88), 0, 0, 12, 3},
};
const upb_msglayout google_protobuf_DescriptorProto_msginit = {
@ -131,16 +131,16 @@ static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1
};
static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
{1, UPB_SIZE(36, 40), 6, 0, 9, 1},
{2, UPB_SIZE(44, 56), 7, 0, 9, 1},
{1, UPB_SIZE(36, 40), 6, 0, 12, 1},
{2, UPB_SIZE(44, 56), 7, 0, 12, 1},
{3, UPB_SIZE(24, 24), 3, 0, 5, 1},
{4, UPB_SIZE(8, 8), 1, 0, 14, 1},
{5, UPB_SIZE(16, 16), 2, 0, 14, 1},
{6, UPB_SIZE(52, 72), 8, 0, 9, 1},
{7, UPB_SIZE(60, 88), 9, 0, 9, 1},
{6, UPB_SIZE(52, 72), 8, 0, 12, 1},
{7, UPB_SIZE(60, 88), 9, 0, 12, 1},
{8, UPB_SIZE(76, 120), 11, 0, 11, 1},
{9, UPB_SIZE(28, 28), 4, 0, 5, 1},
{10, UPB_SIZE(68, 104), 10, 0, 9, 1},
{10, UPB_SIZE(68, 104), 10, 0, 12, 1},
{17, UPB_SIZE(32, 32), 5, 0, 8, 1},
};
@ -155,7 +155,7 @@ static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1
};
static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(12, 24), 2, 0, 11, 1},
};
@ -172,11 +172,11 @@ static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3]
};
static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(16, 32), 0, 2, 11, 3},
{3, UPB_SIZE(12, 24), 2, 1, 11, 1},
{4, UPB_SIZE(20, 40), 0, 0, 11, 3},
{5, UPB_SIZE(24, 48), 0, 0, 9, 3},
{5, UPB_SIZE(24, 48), 0, 0, 12, 3},
};
const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
@ -201,7 +201,7 @@ static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_subms
};
static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
{1, UPB_SIZE(8, 8), 2, 0, 9, 1},
{1, UPB_SIZE(8, 8), 2, 0, 12, 1},
{2, UPB_SIZE(4, 4), 1, 0, 5, 1},
{3, UPB_SIZE(16, 24), 3, 0, 11, 1},
};
@ -218,7 +218,7 @@ static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs
};
static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{1, UPB_SIZE(4, 8), 1, 0, 12, 1},
{2, UPB_SIZE(16, 32), 0, 0, 11, 3},
{3, UPB_SIZE(12, 24), 2, 1, 11, 1},
};
@ -234,9 +234,9 @@ static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[
};
static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
{1, UPB_SIZE(4, 8), 3, 0, 9, 1},
{2, UPB_SIZE(12, 24), 4, 0, 9, 1},
{3, UPB_SIZE(20, 40), 5, 0, 9, 1},
{1, UPB_SIZE(4, 8), 3, 0, 12, 1},
{2, UPB_SIZE(12, 24), 4, 0, 12, 1},
{3, UPB_SIZE(20, 40), 5, 0, 12, 1},
{4, UPB_SIZE(28, 56), 6, 0, 11, 1},
{5, UPB_SIZE(1, 1), 1, 0, 8, 1},
{6, UPB_SIZE(2, 2), 2, 0, 8, 1},
@ -253,11 +253,11 @@ static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
};
static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
{1, UPB_SIZE(28, 32), 11, 0, 9, 1},
{8, UPB_SIZE(36, 48), 12, 0, 9, 1},
{1, UPB_SIZE(28, 32), 11, 0, 12, 1},
{8, UPB_SIZE(36, 48), 12, 0, 12, 1},
{9, UPB_SIZE(8, 8), 1, 0, 14, 1},
{10, UPB_SIZE(16, 16), 2, 0, 8, 1},
{11, UPB_SIZE(44, 64), 13, 0, 9, 1},
{11, UPB_SIZE(44, 64), 13, 0, 12, 1},
{16, UPB_SIZE(17, 17), 3, 0, 8, 1},
{17, UPB_SIZE(18, 18), 4, 0, 8, 1},
{18, UPB_SIZE(19, 19), 5, 0, 8, 1},
@ -265,14 +265,14 @@ static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
{23, UPB_SIZE(21, 21), 7, 0, 8, 1},
{27, UPB_SIZE(22, 22), 8, 0, 8, 1},
{31, UPB_SIZE(23, 23), 9, 0, 8, 1},
{36, UPB_SIZE(52, 80), 14, 0, 9, 1},
{37, UPB_SIZE(60, 96), 15, 0, 9, 1},
{39, UPB_SIZE(68, 112), 16, 0, 9, 1},
{40, UPB_SIZE(76, 128), 17, 0, 9, 1},
{41, UPB_SIZE(84, 144), 18, 0, 9, 1},
{36, UPB_SIZE(52, 80), 14, 0, 12, 1},
{37, UPB_SIZE(60, 96), 15, 0, 12, 1},
{39, UPB_SIZE(68, 112), 16, 0, 12, 1},
{40, UPB_SIZE(76, 128), 17, 0, 12, 1},
{41, UPB_SIZE(84, 144), 18, 0, 12, 1},
{42, UPB_SIZE(24, 24), 10, 0, 8, 1},
{44, UPB_SIZE(92, 160), 19, 0, 9, 1},
{45, UPB_SIZE(100, 176), 20, 0, 9, 1},
{44, UPB_SIZE(92, 160), 19, 0, 12, 1},
{45, UPB_SIZE(100, 176), 20, 0, 12, 1},
{999, UPB_SIZE(108, 192), 0, 0, 11, 3},
};
@ -402,12 +402,12 @@ static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1]
static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
{2, UPB_SIZE(56, 80), 0, 0, 11, 3},
{3, UPB_SIZE(32, 32), 4, 0, 9, 1},
{3, UPB_SIZE(32, 32), 4, 0, 12, 1},
{4, UPB_SIZE(8, 8), 1, 0, 4, 1},
{5, UPB_SIZE(16, 16), 2, 0, 3, 1},
{6, UPB_SIZE(24, 24), 3, 0, 1, 1},
{7, UPB_SIZE(40, 48), 5, 0, 12, 1},
{8, UPB_SIZE(48, 64), 6, 0, 9, 1},
{8, UPB_SIZE(48, 64), 6, 0, 12, 1},
};
const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
@ -417,7 +417,7 @@ const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
};
static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
{1, UPB_SIZE(4, 8), 2, 0, 9, 2},
{1, UPB_SIZE(4, 8), 2, 0, 12, 2},
{2, UPB_SIZE(1, 1), 1, 0, 8, 2},
};
@ -444,9 +444,9 @@ const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
{1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
{2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
{3, UPB_SIZE(4, 8), 1, 0, 9, 1},
{4, UPB_SIZE(12, 24), 2, 0, 9, 1},
{6, UPB_SIZE(28, 56), 0, 0, 9, 3},
{3, UPB_SIZE(4, 8), 1, 0, 12, 1},
{4, UPB_SIZE(12, 24), 2, 0, 12, 1},
{6, UPB_SIZE(28, 56), 0, 0, 12, 3},
};
const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
@ -471,7 +471,7 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
{1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
{2, UPB_SIZE(12, 16), 3, 0, 9, 1},
{2, UPB_SIZE(12, 16), 3, 0, 12, 1},
{3, UPB_SIZE(4, 4), 1, 0, 5, 1},
{4, UPB_SIZE(8, 8), 2, 0, 5, 1},
};

@ -31,4 +31,9 @@ if [[ $(uname) = "Linux" ]]; then
# For some reason kokoro doesn't have Clang available right now.
#CC=clang CXX=clang++ bazel test -c dbg --copt=-fsanitize=undefined --copt=-fno-sanitize=function,vptr --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 -- :all -:test_lua
fi
if which valgrind; then
bazel test --run_under='valgrind --leak-check=full --error-exitcode=1' :all -- -:test_conformance_upb -:cmake_build
fi

@ -3,6 +3,7 @@ local upb = require "lupb"
local lunit = require "lunit"
local upb_test = require "tests.test_pb"
local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
local descriptor = require "google.protobuf.descriptor_pb"
if _VERSION >= 'Lua 5.2' then
@ -69,6 +70,22 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6])
end
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"
local serialized = upb.encode(proto2_msg)
-- Decoding invalid UTF-8 succeeds in proto2.
upb.decode(test_messages_proto2.TestAllTypesProto2, serialized)
-- Decoding invalid UTF-8 fails in proto2.
assert_error_match("Error decoding protobuf", function()
upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
end)
-- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
end
function test_string_double_map()
msg = upb_test.MapTest()
msg.map_string_double["one"] = 1.0

@ -50,7 +50,7 @@ static const int kExpectedHandlerData = 1232323;
class StringBufTesterBase {
public:
static const int kFieldNumber = 3;
static constexpr int kFieldNumber = 3;
StringBufTesterBase() : seen_(false), handler_data_val_(0) {}
@ -286,7 +286,7 @@ class StartMsgTesterBase {
public:
// We don't need the FieldDef it will create, but the test harness still
// requires that we provide one.
static const int kFieldNumber = 3;
static constexpr int kFieldNumber = 3;
StartMsgTesterBase() : seen_(false), handler_data_val_(0) {}
@ -437,7 +437,7 @@ class StartMsgTesterBoolMethodWithHandlerData : public StartMsgTesterBase {
class Int32ValueTesterBase {
public:
static const int kFieldNumber = 1;
static constexpr int kFieldNumber = 1;
Int32ValueTesterBase() : seen_(false), val_(0), handler_data_val_(0) {}
@ -939,6 +939,17 @@ void TestArena() {
upb_arena_malloc(arena.ptr(), 1000000);
}
ASSERT(n == 0);
{
// Test fuse.
upb::Arena arena1;
upb::Arena arena2;
arena1.Fuse(arena2);
upb_arena_malloc(arena1.ptr(), 10000);
upb_arena_malloc(arena2.ptr(), 10000);
}
}
extern "C" {

@ -70,7 +70,7 @@ static void test_scalars() {
static void check_string_map_empty(
protobuf_test_messages_proto3_TestAllTypesProto3 *msg) {
size_t iter;
size_t iter = UPB_MAP_BEGIN;
ASSERT(
protobuf_test_messages_proto3_TestAllTypesProto3_map_string_string_size(
@ -212,7 +212,7 @@ static void test_string_map() {
static void check_int32_map_empty(
protobuf_test_messages_proto3_TestAllTypesProto3 *msg) {
size_t iter;
size_t iter = UPB_MAP_BEGIN;
ASSERT(
protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_size(

@ -62,11 +62,13 @@ static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) |
(1 << UPB_DTYPE_SFIXED64);
/* Op: an action to be performed for a wire-type/field-type combination. */
#define OP_SCALAR_LG2(n) (n)
#define OP_FIXPCK_LG2(n) (n + 4)
#define OP_VARPCK_LG2(n) (n + 8)
#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
#define OP_STRING 4
#define OP_SUBMSG 5
#define OP_BYTES 5
#define OP_SUBMSG 6
/* Ops above are scalar-only. Repeated fields can use any op. */
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
static const int8_t varint_ops[19] = {
-1, /* field not found */
@ -104,7 +106,7 @@ static const int8_t delim_ops[37] = {
OP_STRING, /* STRING */
-1, /* GROUP */
OP_SUBMSG, /* MESSAGE */
OP_STRING, /* BYTES */
OP_BYTES, /* BYTES */
-1, /* UINT32 */
-1, /* ENUM */
-1, /* SFIXED32 */
@ -123,7 +125,7 @@ static const int8_t delim_ops[37] = {
OP_STRING, /* REPEATED STRING */
OP_SUBMSG, /* REPEATED GROUP */
OP_SUBMSG, /* REPEATED MESSAGE */
OP_STRING, /* REPEATED BYTES */
OP_BYTES, /* REPEATED BYTES */
OP_VARPCK_LG2(2), /* REPEATED UINT32 */
OP_VARPCK_LG2(2), /* REPEATED ENUM */
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
@ -143,8 +145,6 @@ typedef struct {
typedef union {
bool bool_val;
int32_t int32_val;
int64_t int64_val;
uint32_t uint32_val;
uint64_t uint64_val;
upb_strview str_val;
@ -155,6 +155,40 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
static const uint8_t utf8_offset[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0,
};
int i, j;
uint8_t offset;
i = 0;
while (i < len) {
offset = utf8_offset[(uint8_t)buf[i]];
if (offset == 0 || i + offset > len) {
decode_err(d);
}
for (j = i + 1; j < i + offset; j++) {
if ((buf[j] & 0xc0) != 0x80) {
decode_err(d);
}
}
i += offset;
}
if (i != len) decode_err(d);
}
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
bool need_realloc = arr->size - arr->len < elem;
if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) {
@ -209,14 +243,21 @@ static void decode_munge(int type, wireval *val) {
break;
case UPB_DESCRIPTOR_TYPE_SINT32: {
uint32_t n = val->uint32_val;
val->int32_val = (n >> 1) ^ -(int32_t)(n & 1);
val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
break;
}
case UPB_DESCRIPTOR_TYPE_SINT64: {
uint64_t n = val->uint64_val;
val->int64_val = (n >> 1) ^ -(int64_t)(n & 1);
val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
break;
}
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
if (!_upb_isle()) {
/* The next stage will memcpy(dst, &val, 4) */
val->uint32_val = val->uint64_val;
}
break;
}
}
@ -300,7 +341,10 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
memcpy(mem, &val, 1 << op);
return ptr;
case OP_STRING:
/* Append string. */
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */
case OP_BYTES:
/* Append bytes. */
mem =
UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
arr->len++;
@ -389,7 +433,7 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
/* Create proactively to handle the case where it doesn't appear. */
ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena);
ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], d->arena));
}
decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
@ -434,6 +478,9 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
break;
}
case OP_STRING:
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
/* Fallthrough. */
case OP_BYTES:
memcpy(mem, &val, sizeof(upb_strview));
break;
case OP_SCALAR_LG2(3):
@ -477,14 +524,16 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
break;
case UPB_WIRE_TYPE_32BIT:
if (d->limit - ptr < 4) decode_err(d);
memcpy(&val, ptr, 4);
memcpy(&val.uint32_val, ptr, 4);
val.uint32_val = _upb_be_swap32(val.uint32_val);
ptr += 4;
op = OP_SCALAR_LG2(2);
if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
break;
case UPB_WIRE_TYPE_64BIT:
if (d->limit - ptr < 8) decode_err(d);
memcpy(&val, ptr, 8);
memcpy(&val.uint64_val, ptr, 8);
val.uint64_val = _upb_be_swap64(val.uint64_val);
ptr += 8;
op = OP_SCALAR_LG2(3);
if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
@ -504,7 +553,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
break;
}
case UPB_WIRE_TYPE_START_GROUP:
val.int32_val = field_number;
val.uint32_val = field_number;
op = OP_SUBMSG;
if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown;
break;

@ -922,6 +922,22 @@ static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
return ret;
}
static int field_number_cmp(const void *p1, const void *p2) {
const upb_msglayout_field *f1 = p1;
const upb_msglayout_field *f2 = p2;
return f1->number - f2->number;
}
static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) {
int i;
int n = upb_msgdef_numfields(m);
for (i = 0; i < n; i++) {
upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
UPB_ASSERT(f);
f->layout_index = i;
}
}
/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
* It computes a dynamic layout for all of the fields in |m|. */
static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
@ -997,16 +1013,19 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
field->descriptortype = upb_fielddef_descriptortype(f);
field->label = upb_fielddef_label(f);
if (field->descriptortype == UPB_DTYPE_STRING &&
f->file->syntax == UPB_SYNTAX_PROTO2) {
/* See TableDescriptorType() in upbc/generator.cc for details and
* rationale. */
field->descriptortype = UPB_DTYPE_BYTES;
}
if (upb_fielddef_ismap(f)) {
field->label = _UPB_LABEL_MAP;
} else if (upb_fielddef_packed(f)) {
field->label = _UPB_LABEL_PACKED;
}
/* TODO: we probably should sort the fields by field number to match the
* output of upbc, and to improve search speed for the table parser. */
f->layout_index = f->index_;
if (upb_fielddef_issubmsg(f)) {
const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
field->submsg_index = submsg_count++;
@ -1079,6 +1098,10 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
* alignment. TODO: track overall alignment for real? */
l->size = UPB_ALIGN_UP(l->size, 8);
/* Sort fields by number. */
qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
assign_layout_indices(m, fields);
return true;
}
@ -1532,13 +1555,21 @@ static bool create_fielddef(
f->oneof = NULL;
}
if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) {
options = google_protobuf_FieldDescriptorProto_options(field_proto);
f->lazy_ = google_protobuf_FieldOptions_lazy(options);
options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
if (options && google_protobuf_FieldOptions_has_packed(options)) {
f->packed_ = google_protobuf_FieldOptions_packed(options);
} else {
/* Repeated fields default to packed for proto3 only. */
f->packed_ = upb_fielddef_isprimitive(f) &&
f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
}
if (options) {
f->lazy_ = google_protobuf_FieldOptions_lazy(options);
} else {
f->lazy_ = false;
f->packed_ = false;
}
return true;

@ -77,12 +77,12 @@ static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
}
static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
/* TODO(haberman): byte-swap for big endian. */
val = _upb_be_swap64(val);
return upb_put_bytes(e, &val, sizeof(uint64_t));
}
static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
/* TODO(haberman): byte-swap for big endian. */
val = _upb_be_swap32(val);
return upb_put_bytes(e, &val, sizeof(uint32_t));
}

@ -106,7 +106,7 @@ typedef struct {
#define UPB_HANDLERATTR_INIT {NULL, NULL, NULL, false}
/* Bufhandle, data passed along with a buffer to indicate its provenance. */
typedef struct {
struct upb_bufhandle {
/* The beginning of the buffer. This may be different than the pointer
* passed to a StringBuf handler because the handler may receive data
* that is from the middle or end of a larger buffer. */
@ -133,7 +133,9 @@ typedef struct {
: NULL;
}
#endif
} upb_bufhandle;
};
typedef struct upb_bufhandle upb_bufhandle;
#define UPB_BUFHANDLE_INIT {NULL, 0, NULL, NULL}

@ -409,6 +409,7 @@ static upb_strview jsondec_string(jsondec *d) {
upb_strview ret;
ret.data = buf;
ret.size = end - buf;
*end = '\0'; /* Needed for possible strtod(). */
return ret;
}
case '\\':

@ -38,6 +38,14 @@ UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) {
longjmp(e->err, 1);
}
UPB_NORETURN static void jsonenc_errf(jsonenc *e, const char *fmt, ...) {
va_list argp;
va_start(argp, fmt);
upb_status_vseterrf(e->status, fmt, argp);
va_end(argp);
longjmp(e->err, 1);
}
static upb_arena *jsonenc_arena(jsonenc *e) {
/* Create lazily, since it's only needed for Any */
if (!e->arena) {
@ -279,7 +287,11 @@ static const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) {
const char *ptr = end;
const upb_msgdef *ret;
if (!e->ext_pool || type_url.size == 0) goto badurl;
if (!e->ext_pool) {
jsonenc_err(e, "Tried to encode Any, but no symtab was provided");
}
if (type_url.size == 0) goto badurl;
while (true) {
if (--ptr == type_url.data) {
@ -295,13 +307,14 @@ static const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) {
ret = upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr);
if (!ret) {
jsonenc_err(e, "Couldn't find Any type");
jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr);
}
return ret;
badurl:
jsonenc_err(e, "Bad type URL");
jsonenc_errf(
e, "Bad type URL: " UPB_STRVIEW_FORMAT, UPB_STRVIEW_ARGS(type_url));
}
static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {

@ -324,7 +324,7 @@ UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size,
if (size == UPB_MAPTYPE_STRING) {
upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
*strp = *(upb_strview*)val;
memcpy(&ret, &strp, sizeof(strp));
ret = upb_value_ptr(strp);
} else {
memcpy(&ret, val, size);
}
@ -455,7 +455,7 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size)
/* This is like _upb_map_tovalue() except the entry already exists so we can
* reuse the allocated upb_strview for string fields. */
if (size == UPB_MAPTYPE_STRING) {
upb_strview *strp = (upb_strview*)ent->val.val;
upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val;
memcpy(strp, val, sizeof(*strp));
} else {
memcpy(&ent->val.val, val, size);

@ -141,7 +141,7 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
#elif defined _MSC_VER
#define UPB_ASSUME(expr) if (!(expr)) __assume(0)
#else
#define UPB_ASSUME(expr) do {} if (false && (expr))
#define UPB_ASSUME(expr) do {} while (false && (expr))
#endif
#else
#define UPB_ASSUME(expr) assert(expr)

@ -559,17 +559,6 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
return success;
}
bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
}
upb_value upb_inttable_pop(upb_inttable *t) {
upb_value val;
bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
UPB_ASSERT(ok);
return val;
}
bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
upb_alloc *a) {
return upb_inttable_insert2(t, (uintptr_t)key, val, a);

@ -326,15 +326,6 @@ UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
* invalidate iterators. */
bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
/* Handy routines for treating an inttable like a stack. May not be mixed with
* other insert/remove calls. */
bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a);
upb_value upb_inttable_pop(upb_inttable *t);
UPB_INLINE bool upb_inttable_push(upb_inttable *t, upb_value val) {
return upb_inttable_push2(t, val, &upb_alloc_global);
}
/* Convenience routines for inttables with pointer keys. */
bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
upb_alloc *a);

@ -106,15 +106,28 @@ struct upb_arena {
static const size_t memblock_reserve = UPB_ALIGN_UP(sizeof(mem_block), 16);
static upb_arena *arena_findroot(upb_arena *a) {
/* Path splitting keeps time complexity down, see:
* https://en.wikipedia.org/wiki/Disjoint-set_data_structure */
while (a->parent != a) {
upb_arena *next = a->parent;
a->parent = next->parent;
a = next;
}
return a;
}
static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size) {
mem_block *block = ptr;
upb_arena *root = arena_findroot(a);
block->next = a->freelist;
/* The block is for arena |a|, but should appear in the freelist of |root|. */
block->next = root->freelist;
block->size = (uint32_t)size;
block->cleanups = 0;
a->freelist = block;
root->freelist = block;
a->last_size = block->size;
if (!a->freelist_tail) a->freelist_tail = block;
if (!root->freelist_tail) root->freelist_tail = block;
a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
a->head.end = UPB_PTR_AT(block, size, char);
@ -149,17 +162,6 @@ static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
return upb_arena_realloc(a, ptr, oldsize, size);
}
static upb_arena *arena_findroot(upb_arena *a) {
/* Path splitting keeps time complexity down, see:
* https://en.wikipedia.org/wiki/Disjoint-set_data_structure */
while (a->parent != a) {
upb_arena *next = a->parent;
a->parent = next->parent;
a = next;
}
return a;
}
/* Public Arena API ***********************************************************/
upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) {

@ -273,6 +273,32 @@ typedef enum {
#define UPB_MAP_BEGIN ((size_t)-1)
UPB_INLINE bool _upb_isle(void) {
int x = 1;
return *(char*)&x == 1;
}
UPB_INLINE uint32_t _upb_be_swap32(uint32_t val) {
if (_upb_isle()) {
return val;
} else {
return ((val & 0xff) << 24) | ((val & 0xff00) << 8) |
((val & 0xff0000ULL) >> 8) | ((val & 0xff000000ULL) >> 24);
}
}
UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
if (_upb_isle()) {
return val;
} else {
return ((val & 0xff) << 56) | ((val & 0xff00) << 40) |
((val & 0xff0000) << 24) | ((val & 0xff000000) << 8) |
((val & 0xff00000000ULL) >> 8) | ((val & 0xff0000000000ULL) >> 24) |
((val & 0xff000000000000ULL) >> 40) |
((val & 0xff00000000000000ULL) >> 56);
}
}
#include "upb/port_undef.inc"
#ifdef __cplusplus

@ -60,6 +60,8 @@ class Arena {
});
}
void Fuse(Arena& other) { upb_arena_fuse(ptr(), other.ptr()); }
private:
std::unique_ptr<upb_arena, decltype(&upb_arena_free)> ptr_;
};

@ -281,7 +281,9 @@ std::string FieldDefault(const protobuf::FieldDescriptor* field) {
case protobuf::FieldDescriptor::CPPTYPE_BOOL:
return field->default_value_bool() ? "true" : "false";
case protobuf::FieldDescriptor::CPPTYPE_ENUM:
return EnumValueSymbol(field->default_value_enum());
// Use a number instead of a symbolic name so that we don't require
// this enum's header to be included.
return absl::StrCat(field->default_value_enum()->number());
}
ABSL_ASSERT(false);
return "XXX";
@ -685,6 +687,22 @@ void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
ToPreproc(file->name()));
}
int TableDescriptorType(const protobuf::FieldDescriptor* field) {
if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 &&
field->type() == protobuf::FieldDescriptor::TYPE_STRING) {
// From the perspective of the binary encoder/decoder, proto2 string fields
// are identical to bytes fields. Only in proto3 do we check UTF-8 for
// string fields at parse time.
//
// If we ever use these tables for JSON encoding/decoding (for example by
// embedding field names on the side) we will have to revisit this, because
// string vs. bytes behavior is not affected by proto2 vs proto3.
return protobuf::FieldDescriptor::TYPE_BYTES;
} else {
return field->type();
}
}
void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
EmitFileWarning(file, output);
@ -779,7 +797,7 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
GetSizeInit(layout.GetFieldOffset(field)),
presence,
submsg_index,
field->type(),
TableDescriptorType(field),
label);
}
output("};\n\n");

Loading…
Cancel
Save