Updated to new protobuf version, and added support for packed=false. (#264)

* WIP.

* Passes most tests.

* A few fixes.

* A few optimizations.

* Some more optimiation.

* Update Protobuf to v3.11.4 and Abseil to LTS 2020-02-25

* Use longjmp instead of explicit error checks at every level.

* Used macros for better documentation of ops.

* Fixed bug with map parsing. All tests are passing except a few conformance tests.

* Fixed remaining bugs, all conformance tests pass.

Also ported all of upb to a single UPB_PTR_AT() macro instead of
having multiple .c files define their own.

* Formatted with clang-format.

* Fixes to compile on Linux.

* A few more compile fixes.

* Script to benchmark changes.

* Fixed parenthesis bug in op calculation.

* Updated generated descriptor files.

* WIP.

* Removed trailing enum to fix the Linux build.

* Respect packed=false to fix conformance failures in new protobuf version.

* Small simplification.

* Fixes to decoder.

* Removed stray comment.

Co-authored-by: Yannic Bonenberger <contact@yannic-bonenberger.com>
pull/13171/head
Joshua Haberman 5 years ago committed by GitHub
parent 08b6d2d6fd
commit 378cbbc3cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 25
      bazel/workspace_deps.bzl
  2. 6
      generated_for_cmake/google/protobuf/descriptor.upb.c
  3. 13
      upb/decode.c
  4. 4
      upb/def.c
  5. 68
      upb/encode.c
  6. 11
      upb/msg.h
  7. 11
      upbc/generator.cc

@ -7,16 +7,31 @@ def upb_deps():
maybe(
git_repository,
name = "com_google_absl",
commit = "070f6e47b33a2909d039e620c873204f78809492",
commit = "df3ea785d8c30a9503321a3d35ee7d35808f190d", # LTS 2020-02-25
remote = "https://github.com/abseil/abseil-cpp.git",
shallow_since = "1541627663 -0500",
shallow_since = "1583355457 -0500"
)
maybe(
git_repository,
http_archive,
name = "com_google_protobuf",
remote = "https://github.com/protocolbuffers/protobuf.git",
commit = "d41002663fd04325ead28439dfd5ce2822b0d6fb",
sha256 = "a79d19dcdf9139fa4b81206e318e33d245c4c9da1ffed21c87288ed4380426f9",
strip_prefix = "protobuf-3.11.4",
urls = [
"https://mirror.bazel.build/github.com/protocolbuffers/protobuf/archive/v3.11.4.tar.gz",
"https://github.com/protocolbuffers/protobuf/archive/v3.11.4.tar.gz",
],
)
maybe(
http_archive,
name = "rules_python",
sha256 = "e5470e92a18aa51830db99a4d9c492cc613761d5bdb7131c04bd92b9834380f6",
strip_prefix = "rules_python-4b84ad270387a7c439ebdccfd530e2339601ef27",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/rules_python/archive/4b84ad270387a7c439ebdccfd530e2339601ef27.tar.gz",
"https://github.com/bazelbuild/rules_python/archive/4b84ad270387a7c439ebdccfd530e2339601ef27.tar.gz",
],
)
maybe(

@ -441,8 +441,8 @@ const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
};
static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
{1, UPB_SIZE(20, 40), 0, 0, 5, 3},
{2, UPB_SIZE(24, 48), 0, 0, 5, 3},
{1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
{2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
{3, UPB_SIZE(4, 8), 1, 0, 9, 1},
{4, UPB_SIZE(12, 24), 2, 0, 9, 1},
{6, UPB_SIZE(28, 56), 0, 0, 9, 3},
@ -469,7 +469,7 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
};
static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
{1, UPB_SIZE(20, 32), 0, 0, 5, 3},
{1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
{2, UPB_SIZE(12, 16), 3, 0, 9, 1},
{3, UPB_SIZE(4, 4), 1, 0, 5, 1},
{4, UPB_SIZE(8, 8), 2, 0, 5, 1},

@ -368,6 +368,7 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
upb_map *map = *map_p;
upb_map_entry ent;
const upb_msglayout *entry = layout->submsgs[field->submsg_index];
if (!map) {
/* Lazily create map. */
@ -384,6 +385,13 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
/* Parse map entry. */
memset(&ent, 0, sizeof(ent));
if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
/* Create proactively to handle the case where it doesn't appear. */
ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena);
}
decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
/* Insert into map. */
@ -482,7 +490,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
case UPB_WIRE_TYPE_DELIMITED: {
uint32_t size;
int ndx = field->descriptortype;
if (field->label == UPB_LABEL_REPEATED) ndx += 18;
if (_upb_isrepeated(field)) ndx += 18;
ptr = decode_varint32(d, ptr, d->limit, &size);
if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) {
decode_err(d); /* Length overflow. */
@ -509,9 +517,10 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
/* Parse, using op for dispatch. */
switch (field->label) {
case UPB_LABEL_REPEATED:
case _UPB_LABEL_PACKED:
ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
break;
case UPB_LABEL_MAP:
case _UPB_LABEL_MAP:
decode_tomap(d, msg, layout, field, val);
break;
default:

@ -952,7 +952,9 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
field->label = upb_fielddef_label(f);
if (upb_fielddef_ismap(f)) {
field->label = UPB_LABEL_MAP;
field->label = _UPB_LABEL_MAP;
} else if (upb_fielddef_packed(f)) {
field->label = _UPB_LABEL_PACKED;
}
/* TODO: we probably should sort the fields by field number to match the

@ -128,10 +128,20 @@ static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
}
static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
size_t size) {
size_t bytes = arr->len * size;
const void* data = _upb_array_constptr(arr);
return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes);
size_t elem_size, uint32_t tag) {
size_t bytes = arr->len * elem_size;
const char* data = _upb_array_constptr(arr);
const char* ptr = data + bytes - elem_size;
if (tag) {
while (true) {
CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag));
if (ptr == data) break;
ptr -= elem_size;
}
return true;
} else {
return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes);
}
}
bool upb_encode_message(upb_encstate *e, const char *msg,
@ -217,38 +227,46 @@ static bool upb_encode_array(upb_encstate *e, const char *field_mem,
const upb_msglayout *m,
const upb_msglayout_field *f) {
const upb_array *arr = *(const upb_array**)field_mem;
bool packed = f->label == _UPB_LABEL_PACKED;
if (arr == NULL || arr->len == 0) {
return true;
}
#define VARINT_CASE(ctype, encode) { \
const ctype *start = _upb_array_constptr(arr); \
const ctype *ptr = start + arr->len; \
size_t pre_len = e->limit - e->ptr; \
do { \
ptr--; \
CHK(upb_put_varint(e, encode)); \
} while (ptr != start); \
CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
} \
break; \
do { ; } while(0)
#define VARINT_CASE(ctype, encode) \
{ \
const ctype *start = _upb_array_constptr(arr); \
const ctype *ptr = start + arr->len; \
size_t pre_len = e->limit - e->ptr; \
uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
do { \
ptr--; \
CHK(upb_put_varint(e, encode)); \
if (tag) CHK(upb_put_varint(e, tag)); \
} while (ptr != start); \
if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
} \
break; \
do { \
; \
} while (0)
#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
switch (f->descriptortype) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
CHK(upb_put_fixedarray(e, arr, sizeof(double)));
CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT)));
break;
case UPB_DESCRIPTOR_TYPE_FLOAT:
CHK(upb_put_fixedarray(e, arr, sizeof(float)));
CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT)));
break;
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t)));
CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT)));
break;
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t)));
CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT)));
break;
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
@ -305,9 +323,9 @@ do { ; } while(0)
}
#undef VARINT_CASE
/* We encode all primitive arrays as packed, regardless of what was specified
* in the .proto file. Could special case 1-sized arrays. */
CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
if (packed) {
CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
}
return true;
}
@ -361,9 +379,9 @@ bool upb_encode_message(upb_encstate *e, const char *msg,
for (i = m->field_count - 1; i >= 0; i--) {
const upb_msglayout_field *f = &m->fields[i];
if (f->label == UPB_LABEL_REPEATED) {
if (_upb_isrepeated(f)) {
CHK(upb_encode_array(e, msg + f->offset, m, f));
} else if (f->label == UPB_LABEL_MAP) {
} else if (f->label == _UPB_LABEL_MAP) {
CHK(upb_encode_map(e, msg + f->offset, m, f));
} else {
bool skip_empty = false;

@ -30,10 +30,11 @@ typedef void upb_msg;
* members are public so generated code can initialize them, but users MUST NOT
* read or write any of its members. */
/* This isn't a real label according to descriptor.proto, but in the table we
* use this for map fields instead of UPB_LABEL_REPEATED. */
/* These aren't real labels according to descriptor.proto, but in the table we
* use these for map/packed fields instead of UPB_LABEL_REPEATED. */
enum {
UPB_LABEL_MAP = 4
_UPB_LABEL_MAP = 4,
_UPB_LABEL_PACKED = 7 /* Low 3 bits are common with UPB_LABEL_REPEATED. */
};
typedef struct {
@ -104,6 +105,10 @@ UPB_INLINE bool _upb_has_oneof_field(const void *msg, size_t case_ofs, int32_t n
return *PTR_AT(msg, case_ofs, int32_t) == num;
}
UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) {
return (field->label & 3) == UPB_LABEL_REPEATED;
}
/** upb_array *****************************************************************/
/* Our internal representation for repeated fields. */

@ -755,8 +755,15 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
assert(case_offset.size64 != 0);
presence = GetSizeInit(case_offset);
}
// Sync '4' with UPB_LABEL_MAP in upb/msg.h.
int label = field->is_map() ? 4 : field->label();
std::string label;
if (field->is_map()) {
label = "_UPB_LABEL_MAP";
} else if (field->is_packed()) {
label = "_UPB_LABEL_PACKED";
} else {
label = absl::StrCat(field->label());
}
output(" {$0, $1, $2, $3, $4, $5},\n",
field->number(),

Loading…
Cancel
Save