Merge branch 'master' into fast-table

pull/13171/head
Joshua Haberman 4 years ago
commit 7a1835d7c3
  1. 2
      bazel/upb_proto_library.bzl
  2. 2
      bazel/workspace_deps.bzl
  3. 7
      tests/bindings/lua/test_upb.lua
  4. 10
      upb/decode.int.h
  5. 22
      upb/decode_fast.c
  6. 34
      upb/json_decode.c
  7. 5
      upb/json_encode.c
  8. 3
      upb/msg.c
  9. 22
      upb/port_def.inc
  10. 3
      upb/port_undef.inc
  11. 14
      upb/upb.c
  12. 20
      upb/upb.h
  13. 47
      upbc/generator.cc

@ -9,7 +9,7 @@ load("@rules_proto//proto:defs.bzl", "ProtoInfo") # copybara:strip_for_google3
# Generic support code #########################################################
_is_bazel = not hasattr(native, "genmpm")
_is_bazel = True # copybara:replace_for_google3 _is_bazel = False
def _get_real_short_path(file):
# For some reason, files from other archives have short paths that look like:

@ -16,7 +16,7 @@ def upb_deps():
git_repository,
name = "com_google_protobuf",
remote = "https://github.com/protocolbuffers/protobuf.git",
commit = "5f5efe50c5bef20042645b51a697f58b0704ac89", # Need to use Git until proto3 optional is released
commit = "c8f76331abf682c289fa79f05b2ee39cc7bf5a48", # Need to use Git until proto3 optional is released
)
maybe(

@ -481,6 +481,13 @@ function test_numeric_map()
end
end
function test_unknown()
local bytes = string.rep("\x38\x00", 1000)
for i=1,1000 do
local msg = upb.decode(test_messages_proto3.TestAllTypesProto3, bytes)
end
end
function test_foo()
local symtab = upb.SymbolTable()
local filename = "external/com_google_protobuf/descriptor_proto-descriptor-set.proto.bin"

@ -22,20 +22,18 @@ const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
const char *fastdecode_err(upb_decstate *d);
UPB_INLINE bool decode_arenahas(upb_decstate *d, size_t bytes) {
return (size_t)(d->arena.head.end - d->arena.head.ptr) >= bytes;
}
UPB_INLINE
upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l,
int msg_ceil_bytes) {
size_t size = l->size + sizeof(upb_msg_internal);
char *msg_data;
if (UPB_LIKELY(msg_ceil_bytes > 0 && decode_arenahas(d, msg_ceil_bytes))) {
if (UPB_LIKELY(msg_ceil_bytes > 0 && _upb_arenahas(&d->arena, msg_ceil_bytes))) {
UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
msg_data = d->arena.head.ptr;
memset(msg_data, 0, msg_ceil_bytes);
d->arena.head.ptr += size;
UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
memset(msg_data, 0, msg_ceil_bytes);
UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
} else {
msg_data = (char*)upb_arena_malloc(&d->arena, size);
memset(msg_data, 0, size);

@ -90,16 +90,15 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
if (UPB_LIKELY(!*arr_p)) {
const size_t initial_len = 8;
size_t need = (valbytes * initial_len) + sizeof(upb_array);
if (UPB_UNLIKELY(!decode_arenahas(d, need))) {
if (!hasbit_is_idx && UPB_UNLIKELY(!_upb_arenahas(&d->arena, need))) {
return NULL;
}
arr = (void*)d->arena.head.ptr;
arr = upb_arena_malloc(&d->arena, need);
field = arr + 1;
arr->data = _upb_array_tagptr(field, elem_size_lg2);
*arr_p = arr;
arr->size = initial_len;
*end = (char*)field + (arr->size * valbytes);
d->arena.head.ptr += need;
} else {
arr = *arr_p;
field = _upb_array_ptr(arr);
@ -311,11 +310,6 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end,
sizeof(upb_msg *), card, true);
if (card == CARD_r) {
if (UPB_UNLIKELY(!submsg)) {
RETURN_GENERIC("need array resize\n");
}
}
if (card == CARD_s) {
*(uint32_t*)msg |= hasbits >> 16;
hasbits = 0;
@ -332,15 +326,11 @@ again:
size_t new_size = old_size * 2;
size_t new_bytes = new_size * sizeof(upb_msg*);
char *old_ptr = _upb_array_ptr(arr);
if (UPB_UNLIKELY(!decode_arenahas(d, new_bytes))) {
goto repeated_generic;
}
memcpy(d->arena.head.ptr, old_ptr, old_bytes);
char *new_ptr = upb_arena_realloc(&d->arena, old_ptr, old_bytes, new_bytes);
arr->size = new_size;
arr->data = _upb_array_tagptr(d->arena.head.ptr, 3);
submsg = (void*)(d->arena.head.ptr + (old_size * sizeof(upb_msg*)));
end = (void*)(d->arena.head.ptr + (new_size * sizeof(upb_msg*)));
d->arena.head.ptr += new_bytes;
arr->data = _upb_array_tagptr(new_ptr, 3);
submsg = (void*)(new_ptr + (old_size * sizeof(upb_msg*)));
end = (void*)(new_ptr + (new_size * sizeof(upb_msg*)));
}
}

@ -42,6 +42,19 @@ static bool jsondec_streql(upb_strview str, const char *lit) {
return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
}
static bool jsondec_isnullvalue(const upb_fielddef *f) {
return upb_fielddef_type(f) == UPB_TYPE_ENUM &&
strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)),
"google.protobuf.NullValue") == 0;
}
static bool jsondec_isvalue(const upb_fielddef *f) {
return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
UPB_WELLKNOWN_VALUE) ||
jsondec_isnullvalue(f);
}
UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) {
upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line,
(int)(d->ptr - d->line_begin), msg);
@ -769,7 +782,8 @@ static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) {
}
static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
if (jsondec_peek(d) == JD_STRING) {
switch (jsondec_peek(d)) {
case JD_STRING: {
const upb_enumdef *e = upb_fielddef_enumsubdef(f);
upb_strview str = jsondec_string(d);
upb_msgval val;
@ -782,7 +796,17 @@ static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
}
}
return val;
} else {
}
case JD_NULL: {
if (jsondec_isnullvalue(f)) {
upb_msgval val;
jsondec_null(d);
val.int32_val = 0;
return val;
}
}
/* Fallthrough. */
default:
return jsondec_int(d, f);
}
}
@ -867,12 +891,6 @@ static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) {
return val;
}
static bool jsondec_isvalue(const upb_fielddef *f) {
return upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
UPB_WELLKNOWN_VALUE;
}
static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
upb_strview name;
const upb_fielddef *f;

@ -167,6 +167,10 @@ static void jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m
static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) {
const upb_enumdef *e_def = upb_fielddef_enumsubdef(f);
if (strcmp(upb_enumdef_fullname(e_def), "google.protobuf.NullValue") == 0) {
jsonenc_putstr(e, "null");
} else {
const char *name = upb_enumdef_iton(e_def, val);
if (name) {
@ -175,6 +179,7 @@ static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) {
jsonenc_printf(e, "%" PRId32, val);
}
}
}
static void jsonenc_bytes(jsonenc *e, upb_strview str) {
/* This is the regular base64, not the "web-safe" version. */

@ -36,11 +36,12 @@ bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
in->unknown->len = 0;
} else if (in->unknown->size - in->unknown->len < len) {
size_t need = in->unknown->len + len;
size_t size = in->unknown->size;;
size_t size = in->unknown->size;
while (size < need) size *= 2;
in->unknown = upb_arena_realloc(
arena, in->unknown, in->unknown->size + overhead, size + overhead);
if (!in->unknown) return false;
in->unknown->size = size;
}
memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len);
in->unknown->len += len;

@ -181,3 +181,25 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
#else
#define UPB_NAN (0.0 / 0.0)
#endif
#if defined(__SANITIZE_ADDRESS__)
#define UPB_ASAN 1
#ifdef __cplusplus
extern "C" {
#endif
void __asan_poison_memory_region(void const volatile *addr, size_t size);
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#define UPB_POISON_MEMORY_REGION(addr, size) \
__asan_poison_memory_region((addr), (size))
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
__asan_unpoison_memory_region((addr), (size))
#else
#define UPB_ASAN 0
#define UPB_POISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#endif

@ -26,3 +26,6 @@
#undef _upb_snprintf
#undef _upb_vsnprintf
#undef _upb_va_copy
#undef UPB_POISON_MEMORY_REGION
#undef UPB_UNPOISON_MEMORY_REGION
#undef UPB_ASAN

@ -114,7 +114,7 @@ static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr,
a->head.end = UPB_PTR_AT(block, size, char);
a->cleanups = &block->cleanups;
/* TODO(haberman): ASAN poison. */
UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
}
static bool upb_arena_allocblock(upb_arena *a, size_t size) {
@ -127,14 +127,9 @@ static bool upb_arena_allocblock(upb_arena *a, size_t size) {
return true;
}
static bool arena_has(upb_arena *a, size_t size) {
_upb_arena_head *h = (_upb_arena_head*)a;
return (size_t)(h->end - h->ptr) >= size;
}
void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */
UPB_ASSERT(arena_has(a, size));
UPB_ASSERT(_upb_arenahas(a, size));
return upb_arena_malloc(a, size);
}
@ -229,14 +224,15 @@ void upb_arena_free(upb_arena *a) {
bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
cleanup_ent *ent;
if (!a->cleanups || !arena_has(a, sizeof(cleanup_ent))) {
if (!a->cleanups || !_upb_arenahas(a, sizeof(cleanup_ent))) {
if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */
UPB_ASSERT(arena_has(a, sizeof(cleanup_ent)));
UPB_ASSERT(_upb_arenahas(a, sizeof(cleanup_ent)));
}
a->head.end -= sizeof(cleanup_ent);
ent = (cleanup_ent*)a->head.end;
(*a->cleanups)++;
UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent));
ent->cleanup = func;
ent->ud = ud;

@ -161,17 +161,35 @@ void *_upb_arena_slowmalloc(upb_arena *a, size_t size);
UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; }
UPB_INLINE bool _upb_arenahas(upb_arena *a, size_t size) {
_upb_arena_head *h = (_upb_arena_head*)a;
return (size_t)(h->end - h->ptr) >= size;
}
UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) {
_upb_arena_head *h = (_upb_arena_head*)a;
void* ret;
size = UPB_ALIGN_MALLOC(size);
if (UPB_UNLIKELY((size_t)(h->end - h->ptr) < size)) {
if (UPB_UNLIKELY(!_upb_arenahas(a, size))) {
return _upb_arena_slowmalloc(a, size);
}
ret = h->ptr;
h->ptr += size;
UPB_UNPOISON_MEMORY_REGION(ret, size);
#if UPB_ASAN
{
size_t guard_size = 32;
if (_upb_arenahas(a, guard_size)) {
h->ptr += guard_size;
} else {
h->ptr = h->end;
}
}
#endif
return ret;
}

@ -705,26 +705,39 @@ int TableDescriptorType(const protobuf::FieldDescriptor* field) {
}
struct SubmsgArray {
std::vector<const protobuf::Descriptor*> messages;
absl::flat_hash_map<const protobuf::Descriptor*, int> indexes;
};
SubmsgArray GetSubmsgArray(const protobuf::Descriptor* message) {
SubmsgArray ret;
public:
SubmsgArray(const protobuf::Descriptor* message) : message_(message) {
MessageLayout layout(message);
std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
SortedSubmessages(message);
int i = 0;
for (auto submsg : sorted_submsgs) {
if (ret.indexes.find(submsg->message_type()) != ret.indexes.end()) {
if (indexes_.find(submsg->message_type()) != indexes_.end()) {
continue;
}
ret.messages.push_back(submsg->message_type());
ret.indexes[submsg->message_type()] = i++;
submsgs_.push_back(submsg->message_type());
indexes_[submsg->message_type()] = i++;
}
return ret;
}
const std::vector<const protobuf::Descriptor*>& submsgs() const {
return submsgs_;
}
int GetIndex(const protobuf::FieldDescriptor* field) {
(void)message_;
assert(field->containing_type() == message_);
auto it = indexes_.find(field->message_type());
assert(it != indexes_.end());
return it->second;
}
private:
const protobuf::Descriptor* message_;
std::vector<const protobuf::Descriptor*> submsgs_;
absl::flat_hash_map<const protobuf::Descriptor*, int> indexes_;
};
typedef std::pair<std::string, MessageLayout::Size> TableEntry;
void TryFillTableEntry(const protobuf::Descriptor* message,
@ -807,8 +820,8 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
data.size64 = ((uint64_t)offset.size64 << 48) | expected_tag;
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
SubmsgArray submsg_array = GetSubmsgArray(message);
uint64_t idx = submsg_array.indexes[field->message_type()];
SubmsgArray submsg_array(message);
uint64_t idx = submsg_array.GetIndex(field);
data.size32 |= idx << 16 | hasbit_index << 32;
data.size64 |= idx << 16 | hasbit_index << 32;
} else {
@ -879,17 +892,17 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
std::string fields_array_ref = "NULL";
std::string submsgs_array_ref = "NULL";
MessageLayout layout(message);
SubmsgArray submsg_array = GetSubmsgArray(message);
SubmsgArray submsg_array(message);
if (!submsg_array.messages.empty()) {
if (!submsg_array.submsgs().empty()) {
// TODO(haberman): could save a little bit of space by only generating a
// "submsgs" array for every strongly-connected component.
std::string submsgs_array_name = msgname + "_submsgs";
submsgs_array_ref = "&" + submsgs_array_name + "[0]";
output("static const upb_msglayout *const $0[$1] = {\n",
submsgs_array_name, submsg_array.messages.size());
submsgs_array_name, submsg_array.submsgs().size());
for (auto submsg : submsg_array.messages) {
for (auto submsg : submsg_array.submsgs()) {
output(" &$0,\n", MessageInit(submsg));
}
@ -908,7 +921,7 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
std::string presence = "0";
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
submsg_index = submsg_array.indexes[field->message_type()];
submsg_index = submsg_array.GetIndex(field);
}
if (MessageLayout::HasHasbit(field)) {

Loading…
Cancel
Save