Merge branch 'master' into fast-table

pull/13171/head
Joshua Haberman 4 years ago
commit 7a1835d7c3
  1. 2
      bazel/upb_proto_library.bzl
  2. 2
      bazel/workspace_deps.bzl
  3. 7
      tests/bindings/lua/test_upb.lua
  4. 10
      upb/decode.int.h
  5. 22
      upb/decode_fast.c
  6. 34
      upb/json_decode.c
  7. 5
      upb/json_encode.c
  8. 3
      upb/msg.c
  9. 22
      upb/port_def.inc
  10. 3
      upb/port_undef.inc
  11. 14
      upb/upb.c
  12. 20
      upb/upb.h
  13. 47
      upbc/generator.cc

@ -9,7 +9,7 @@ load("@rules_proto//proto:defs.bzl", "ProtoInfo") # copybara:strip_for_google3
# Generic support code ######################################################### # Generic support code #########################################################
_is_bazel = not hasattr(native, "genmpm") _is_bazel = True # copybara:replace_for_google3 _is_bazel = False
def _get_real_short_path(file): def _get_real_short_path(file):
# For some reason, files from other archives have short paths that look like: # For some reason, files from other archives have short paths that look like:

@ -16,7 +16,7 @@ def upb_deps():
git_repository, git_repository,
name = "com_google_protobuf", name = "com_google_protobuf",
remote = "https://github.com/protocolbuffers/protobuf.git", remote = "https://github.com/protocolbuffers/protobuf.git",
commit = "5f5efe50c5bef20042645b51a697f58b0704ac89", # Need to use Git until proto3 optional is released commit = "c8f76331abf682c289fa79f05b2ee39cc7bf5a48", # Need to use Git until proto3 optional is released
) )
maybe( maybe(

@ -481,6 +481,13 @@ function test_numeric_map()
end end
end end
function test_unknown()
local bytes = string.rep("\x38\x00", 1000)
for i=1,1000 do
local msg = upb.decode(test_messages_proto3.TestAllTypesProto3, bytes)
end
end
function test_foo() function test_foo()
local symtab = upb.SymbolTable() local symtab = upb.SymbolTable()
local filename = "external/com_google_protobuf/descriptor_proto-descriptor-set.proto.bin" local filename = "external/com_google_protobuf/descriptor_proto-descriptor-set.proto.bin"

@ -22,20 +22,18 @@ const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
const char *fastdecode_err(upb_decstate *d); const char *fastdecode_err(upb_decstate *d);
UPB_INLINE bool decode_arenahas(upb_decstate *d, size_t bytes) {
return (size_t)(d->arena.head.end - d->arena.head.ptr) >= bytes;
}
UPB_INLINE UPB_INLINE
upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l, upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l,
int msg_ceil_bytes) { int msg_ceil_bytes) {
size_t size = l->size + sizeof(upb_msg_internal); size_t size = l->size + sizeof(upb_msg_internal);
char *msg_data; char *msg_data;
if (UPB_LIKELY(msg_ceil_bytes > 0 && decode_arenahas(d, msg_ceil_bytes))) { if (UPB_LIKELY(msg_ceil_bytes > 0 && _upb_arenahas(&d->arena, msg_ceil_bytes))) {
UPB_ASSERT(size <= (size_t)msg_ceil_bytes); UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
msg_data = d->arena.head.ptr; msg_data = d->arena.head.ptr;
memset(msg_data, 0, msg_ceil_bytes);
d->arena.head.ptr += size; d->arena.head.ptr += size;
UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
memset(msg_data, 0, msg_ceil_bytes);
UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
} else { } else {
msg_data = (char*)upb_arena_malloc(&d->arena, size); msg_data = (char*)upb_arena_malloc(&d->arena, size);
memset(msg_data, 0, size); memset(msg_data, 0, size);

@ -90,16 +90,15 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
if (UPB_LIKELY(!*arr_p)) { if (UPB_LIKELY(!*arr_p)) {
const size_t initial_len = 8; const size_t initial_len = 8;
size_t need = (valbytes * initial_len) + sizeof(upb_array); size_t need = (valbytes * initial_len) + sizeof(upb_array);
if (UPB_UNLIKELY(!decode_arenahas(d, need))) { if (!hasbit_is_idx && UPB_UNLIKELY(!_upb_arenahas(&d->arena, need))) {
return NULL; return NULL;
} }
arr = (void*)d->arena.head.ptr; arr = upb_arena_malloc(&d->arena, need);
field = arr + 1; field = arr + 1;
arr->data = _upb_array_tagptr(field, elem_size_lg2); arr->data = _upb_array_tagptr(field, elem_size_lg2);
*arr_p = arr; *arr_p = arr;
arr->size = initial_len; arr->size = initial_len;
*end = (char*)field + (arr->size * valbytes); *end = (char*)field + (arr->size * valbytes);
d->arena.head.ptr += need;
} else { } else {
arr = *arr_p; arr = *arr_p;
field = _upb_array_ptr(arr); field = _upb_array_ptr(arr);
@ -311,11 +310,6 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end, submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end,
sizeof(upb_msg *), card, true); sizeof(upb_msg *), card, true);
if (card == CARD_r) {
if (UPB_UNLIKELY(!submsg)) {
RETURN_GENERIC("need array resize\n");
}
}
if (card == CARD_s) { if (card == CARD_s) {
*(uint32_t*)msg |= hasbits >> 16; *(uint32_t*)msg |= hasbits >> 16;
hasbits = 0; hasbits = 0;
@ -332,15 +326,11 @@ again:
size_t new_size = old_size * 2; size_t new_size = old_size * 2;
size_t new_bytes = new_size * sizeof(upb_msg*); size_t new_bytes = new_size * sizeof(upb_msg*);
char *old_ptr = _upb_array_ptr(arr); char *old_ptr = _upb_array_ptr(arr);
if (UPB_UNLIKELY(!decode_arenahas(d, new_bytes))) { char *new_ptr = upb_arena_realloc(&d->arena, old_ptr, old_bytes, new_bytes);
goto repeated_generic;
}
memcpy(d->arena.head.ptr, old_ptr, old_bytes);
arr->size = new_size; arr->size = new_size;
arr->data = _upb_array_tagptr(d->arena.head.ptr, 3); arr->data = _upb_array_tagptr(new_ptr, 3);
submsg = (void*)(d->arena.head.ptr + (old_size * sizeof(upb_msg*))); submsg = (void*)(new_ptr + (old_size * sizeof(upb_msg*)));
end = (void*)(d->arena.head.ptr + (new_size * sizeof(upb_msg*))); end = (void*)(new_ptr + (new_size * sizeof(upb_msg*)));
d->arena.head.ptr += new_bytes;
} }
} }

@ -42,6 +42,19 @@ static bool jsondec_streql(upb_strview str, const char *lit) {
return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0; return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
} }
static bool jsondec_isnullvalue(const upb_fielddef *f) {
return upb_fielddef_type(f) == UPB_TYPE_ENUM &&
strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)),
"google.protobuf.NullValue") == 0;
}
static bool jsondec_isvalue(const upb_fielddef *f) {
return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
UPB_WELLKNOWN_VALUE) ||
jsondec_isnullvalue(f);
}
UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) { UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) {
upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line, upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line,
(int)(d->ptr - d->line_begin), msg); (int)(d->ptr - d->line_begin), msg);
@ -769,7 +782,8 @@ static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) {
} }
static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) { static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
if (jsondec_peek(d) == JD_STRING) { switch (jsondec_peek(d)) {
case JD_STRING: {
const upb_enumdef *e = upb_fielddef_enumsubdef(f); const upb_enumdef *e = upb_fielddef_enumsubdef(f);
upb_strview str = jsondec_string(d); upb_strview str = jsondec_string(d);
upb_msgval val; upb_msgval val;
@ -782,7 +796,17 @@ static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
} }
} }
return val; return val;
} else { }
case JD_NULL: {
if (jsondec_isnullvalue(f)) {
upb_msgval val;
jsondec_null(d);
val.int32_val = 0;
return val;
}
}
/* Fallthrough. */
default:
return jsondec_int(d, f); return jsondec_int(d, f);
} }
} }
@ -867,12 +891,6 @@ static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) {
return val; return val;
} }
static bool jsondec_isvalue(const upb_fielddef *f) {
return upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
UPB_WELLKNOWN_VALUE;
}
static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
upb_strview name; upb_strview name;
const upb_fielddef *f; const upb_fielddef *f;

@ -167,6 +167,10 @@ static void jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m
static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) { static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) {
const upb_enumdef *e_def = upb_fielddef_enumsubdef(f); const upb_enumdef *e_def = upb_fielddef_enumsubdef(f);
if (strcmp(upb_enumdef_fullname(e_def), "google.protobuf.NullValue") == 0) {
jsonenc_putstr(e, "null");
} else {
const char *name = upb_enumdef_iton(e_def, val); const char *name = upb_enumdef_iton(e_def, val);
if (name) { if (name) {
@ -175,6 +179,7 @@ static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) {
jsonenc_printf(e, "%" PRId32, val); jsonenc_printf(e, "%" PRId32, val);
} }
} }
}
static void jsonenc_bytes(jsonenc *e, upb_strview str) { static void jsonenc_bytes(jsonenc *e, upb_strview str) {
/* This is the regular base64, not the "web-safe" version. */ /* This is the regular base64, not the "web-safe" version. */

@ -36,11 +36,12 @@ bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
in->unknown->len = 0; in->unknown->len = 0;
} else if (in->unknown->size - in->unknown->len < len) { } else if (in->unknown->size - in->unknown->len < len) {
size_t need = in->unknown->len + len; size_t need = in->unknown->len + len;
size_t size = in->unknown->size;; size_t size = in->unknown->size;
while (size < need) size *= 2; while (size < need) size *= 2;
in->unknown = upb_arena_realloc( in->unknown = upb_arena_realloc(
arena, in->unknown, in->unknown->size + overhead, size + overhead); arena, in->unknown, in->unknown->size + overhead, size + overhead);
if (!in->unknown) return false; if (!in->unknown) return false;
in->unknown->size = size;
} }
memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len); memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len);
in->unknown->len += len; in->unknown->len += len;

@ -181,3 +181,25 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
#else #else
#define UPB_NAN (0.0 / 0.0) #define UPB_NAN (0.0 / 0.0)
#endif #endif
#if defined(__SANITIZE_ADDRESS__)
#define UPB_ASAN 1
#ifdef __cplusplus
extern "C" {
#endif
void __asan_poison_memory_region(void const volatile *addr, size_t size);
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#define UPB_POISON_MEMORY_REGION(addr, size) \
__asan_poison_memory_region((addr), (size))
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
__asan_unpoison_memory_region((addr), (size))
#else
#define UPB_ASAN 0
#define UPB_POISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#endif

@ -26,3 +26,6 @@
#undef _upb_snprintf #undef _upb_snprintf
#undef _upb_vsnprintf #undef _upb_vsnprintf
#undef _upb_va_copy #undef _upb_va_copy
#undef UPB_POISON_MEMORY_REGION
#undef UPB_UNPOISON_MEMORY_REGION
#undef UPB_ASAN

@ -114,7 +114,7 @@ static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr,
a->head.end = UPB_PTR_AT(block, size, char); a->head.end = UPB_PTR_AT(block, size, char);
a->cleanups = &block->cleanups; a->cleanups = &block->cleanups;
/* TODO(haberman): ASAN poison. */ UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
} }
static bool upb_arena_allocblock(upb_arena *a, size_t size) { static bool upb_arena_allocblock(upb_arena *a, size_t size) {
@ -127,14 +127,9 @@ static bool upb_arena_allocblock(upb_arena *a, size_t size) {
return true; return true;
} }
static bool arena_has(upb_arena *a, size_t size) {
_upb_arena_head *h = (_upb_arena_head*)a;
return (size_t)(h->end - h->ptr) >= size;
}
void *_upb_arena_slowmalloc(upb_arena *a, size_t size) { void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */ if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */
UPB_ASSERT(arena_has(a, size)); UPB_ASSERT(_upb_arenahas(a, size));
return upb_arena_malloc(a, size); return upb_arena_malloc(a, size);
} }
@ -229,14 +224,15 @@ void upb_arena_free(upb_arena *a) {
bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
cleanup_ent *ent; cleanup_ent *ent;
if (!a->cleanups || !arena_has(a, sizeof(cleanup_ent))) { if (!a->cleanups || !_upb_arenahas(a, sizeof(cleanup_ent))) {
if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */
UPB_ASSERT(arena_has(a, sizeof(cleanup_ent))); UPB_ASSERT(_upb_arenahas(a, sizeof(cleanup_ent)));
} }
a->head.end -= sizeof(cleanup_ent); a->head.end -= sizeof(cleanup_ent);
ent = (cleanup_ent*)a->head.end; ent = (cleanup_ent*)a->head.end;
(*a->cleanups)++; (*a->cleanups)++;
UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent));
ent->cleanup = func; ent->cleanup = func;
ent->ud = ud; ent->ud = ud;

@ -161,17 +161,35 @@ void *_upb_arena_slowmalloc(upb_arena *a, size_t size);
UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; }
UPB_INLINE bool _upb_arenahas(upb_arena *a, size_t size) {
_upb_arena_head *h = (_upb_arena_head*)a;
return (size_t)(h->end - h->ptr) >= size;
}
UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) { UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) {
_upb_arena_head *h = (_upb_arena_head*)a; _upb_arena_head *h = (_upb_arena_head*)a;
void* ret; void* ret;
size = UPB_ALIGN_MALLOC(size); size = UPB_ALIGN_MALLOC(size);
if (UPB_UNLIKELY((size_t)(h->end - h->ptr) < size)) { if (UPB_UNLIKELY(!_upb_arenahas(a, size))) {
return _upb_arena_slowmalloc(a, size); return _upb_arena_slowmalloc(a, size);
} }
ret = h->ptr; ret = h->ptr;
h->ptr += size; h->ptr += size;
UPB_UNPOISON_MEMORY_REGION(ret, size);
#if UPB_ASAN
{
size_t guard_size = 32;
if (_upb_arenahas(a, guard_size)) {
h->ptr += guard_size;
} else {
h->ptr = h->end;
}
}
#endif
return ret; return ret;
} }

@ -705,26 +705,39 @@ int TableDescriptorType(const protobuf::FieldDescriptor* field) {
} }
struct SubmsgArray { struct SubmsgArray {
std::vector<const protobuf::Descriptor*> messages; public:
absl::flat_hash_map<const protobuf::Descriptor*, int> indexes; SubmsgArray(const protobuf::Descriptor* message) : message_(message) {
};
SubmsgArray GetSubmsgArray(const protobuf::Descriptor* message) {
SubmsgArray ret;
MessageLayout layout(message); MessageLayout layout(message);
std::vector<const protobuf::FieldDescriptor*> sorted_submsgs = std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
SortedSubmessages(message); SortedSubmessages(message);
int i = 0; int i = 0;
for (auto submsg : sorted_submsgs) { for (auto submsg : sorted_submsgs) {
if (ret.indexes.find(submsg->message_type()) != ret.indexes.end()) { if (indexes_.find(submsg->message_type()) != indexes_.end()) {
continue; continue;
} }
ret.messages.push_back(submsg->message_type()); submsgs_.push_back(submsg->message_type());
ret.indexes[submsg->message_type()] = i++; indexes_[submsg->message_type()] = i++;
} }
return ret;
} }
const std::vector<const protobuf::Descriptor*>& submsgs() const {
return submsgs_;
}
int GetIndex(const protobuf::FieldDescriptor* field) {
(void)message_;
assert(field->containing_type() == message_);
auto it = indexes_.find(field->message_type());
assert(it != indexes_.end());
return it->second;
}
private:
const protobuf::Descriptor* message_;
std::vector<const protobuf::Descriptor*> submsgs_;
absl::flat_hash_map<const protobuf::Descriptor*, int> indexes_;
};
typedef std::pair<std::string, MessageLayout::Size> TableEntry; typedef std::pair<std::string, MessageLayout::Size> TableEntry;
void TryFillTableEntry(const protobuf::Descriptor* message, void TryFillTableEntry(const protobuf::Descriptor* message,
@ -807,8 +820,8 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
data.size64 = ((uint64_t)offset.size64 << 48) | expected_tag; data.size64 = ((uint64_t)offset.size64 << 48) | expected_tag;
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) { if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
SubmsgArray submsg_array = GetSubmsgArray(message); SubmsgArray submsg_array(message);
uint64_t idx = submsg_array.indexes[field->message_type()]; uint64_t idx = submsg_array.GetIndex(field);
data.size32 |= idx << 16 | hasbit_index << 32; data.size32 |= idx << 16 | hasbit_index << 32;
data.size64 |= idx << 16 | hasbit_index << 32; data.size64 |= idx << 16 | hasbit_index << 32;
} else { } else {
@ -879,17 +892,17 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
std::string fields_array_ref = "NULL"; std::string fields_array_ref = "NULL";
std::string submsgs_array_ref = "NULL"; std::string submsgs_array_ref = "NULL";
MessageLayout layout(message); MessageLayout layout(message);
SubmsgArray submsg_array = GetSubmsgArray(message); SubmsgArray submsg_array(message);
if (!submsg_array.messages.empty()) { if (!submsg_array.submsgs().empty()) {
// TODO(haberman): could save a little bit of space by only generating a // TODO(haberman): could save a little bit of space by only generating a
// "submsgs" array for every strongly-connected component. // "submsgs" array for every strongly-connected component.
std::string submsgs_array_name = msgname + "_submsgs"; std::string submsgs_array_name = msgname + "_submsgs";
submsgs_array_ref = "&" + submsgs_array_name + "[0]"; submsgs_array_ref = "&" + submsgs_array_name + "[0]";
output("static const upb_msglayout *const $0[$1] = {\n", output("static const upb_msglayout *const $0[$1] = {\n",
submsgs_array_name, submsg_array.messages.size()); submsgs_array_name, submsg_array.submsgs().size());
for (auto submsg : submsg_array.messages) { for (auto submsg : submsg_array.submsgs()) {
output(" &$0,\n", MessageInit(submsg)); output(" &$0,\n", MessageInit(submsg));
} }
@ -908,7 +921,7 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
std::string presence = "0"; std::string presence = "0";
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
submsg_index = submsg_array.indexes[field->message_type()]; submsg_index = submsg_array.GetIndex(field);
} }
if (MessageLayout::HasHasbit(field)) { if (MessageLayout::HasHasbit(field)) {

Loading…
Cancel
Save