Merge pull request #427 from haberman/proto2enum

Implemented proto2's unfortunate enum behavior
pull/13171/head
Joshua Haberman 3 years ago committed by GitHub
commit b5b32357b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      benchmarks/compare.py
  2. 73
      cmake/google/protobuf/descriptor.upb.c
  3. 7
      cmake/google/protobuf/descriptor.upb.h
  4. 192
      upb/decode.c
  5. 115
      upb/def.c
  6. 14
      upb/msg_internal.h
  7. 74
      upb/msg_test.cc
  8. 21
      upb/msg_test.proto
  9. 191
      upbc/protoc-gen-upb.cc

@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3
#
# Copyright (c) 2009-2021, Google LLC
# All rights reserved.

@ -126,16 +126,18 @@ const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
UPB_SIZE(8, 8), 1, _UPB_MSGEXT_EXTENDABLE, 0, 255,
};
static const upb_msglayout_sub google_protobuf_FieldDescriptorProto_submsgs[1] = {
static const upb_msglayout_sub google_protobuf_FieldDescriptorProto_submsgs[3] = {
{.submsg = &google_protobuf_FieldOptions_msginit},
{.subenum = &google_protobuf_FieldDescriptorProto_Label_enuminit},
{.subenum = &google_protobuf_FieldDescriptorProto_Type_enuminit},
};
static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
{1, UPB_SIZE(24, 24), 1, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{2, UPB_SIZE(32, 40), 2, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{3, UPB_SIZE(12, 12), 3, 0, 5, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{4, UPB_SIZE(4, 4), 4, 0, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{5, UPB_SIZE(8, 8), 5, 0, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{4, UPB_SIZE(4, 4), 4, 1, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{5, UPB_SIZE(8, 8), 5, 2, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{6, UPB_SIZE(40, 56), 6, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{7, UPB_SIZE(48, 72), 7, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{8, UPB_SIZE(64, 104), 8, 0, 11, _UPB_MODE_SCALAR | (_UPB_REP_PTR << _UPB_REP_SHIFT)},
@ -248,14 +250,15 @@ const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
UPB_SIZE(32, 64), 6, _UPB_MSGEXT_NONE, 6, 255,
};
static const upb_msglayout_sub google_protobuf_FileOptions_submsgs[1] = {
static const upb_msglayout_sub google_protobuf_FileOptions_submsgs[2] = {
{.submsg = &google_protobuf_UninterpretedOption_msginit},
{.subenum = &google_protobuf_FileOptions_OptimizeMode_enuminit},
};
static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
{1, UPB_SIZE(20, 24), 1, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{8, UPB_SIZE(28, 40), 2, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{9, UPB_SIZE(4, 4), 3, 0, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{9, UPB_SIZE(4, 4), 3, 1, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{10, UPB_SIZE(8, 8), 4, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
{11, UPB_SIZE(36, 56), 5, 0, 12, _UPB_MODE_SCALAR | (_UPB_REP_STRVIEW << _UPB_REP_SHIFT)},
{16, UPB_SIZE(9, 9), 6, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
@ -300,16 +303,18 @@ const upb_msglayout google_protobuf_MessageOptions_msginit = {
UPB_SIZE(16, 16), 5, _UPB_MSGEXT_EXTENDABLE, 3, 255,
};
static const upb_msglayout_sub google_protobuf_FieldOptions_submsgs[1] = {
static const upb_msglayout_sub google_protobuf_FieldOptions_submsgs[3] = {
{.submsg = &google_protobuf_UninterpretedOption_msginit},
{.subenum = &google_protobuf_FieldOptions_CType_enuminit},
{.subenum = &google_protobuf_FieldOptions_JSType_enuminit},
};
static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
{1, UPB_SIZE(4, 4), 1, 0, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{1, UPB_SIZE(4, 4), 1, 1, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{2, UPB_SIZE(12, 12), 2, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
{3, UPB_SIZE(13, 13), 3, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
{5, UPB_SIZE(14, 14), 4, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
{6, UPB_SIZE(8, 8), 5, 0, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{6, UPB_SIZE(8, 8), 5, 2, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{10, UPB_SIZE(15, 15), 6, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
{999, UPB_SIZE(16, 16), 0, 0, 11, _UPB_MODE_ARRAY | (_UPB_REP_PTR << _UPB_REP_SHIFT)},
};
@ -380,13 +385,14 @@ const upb_msglayout google_protobuf_ServiceOptions_msginit = {
UPB_SIZE(8, 16), 2, _UPB_MSGEXT_EXTENDABLE, 0, 255,
};
static const upb_msglayout_sub google_protobuf_MethodOptions_submsgs[1] = {
static const upb_msglayout_sub google_protobuf_MethodOptions_submsgs[2] = {
{.submsg = &google_protobuf_UninterpretedOption_msginit},
{.subenum = &google_protobuf_MethodOptions_IdempotencyLevel_enuminit},
};
static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
{33, UPB_SIZE(8, 8), 1, 0, 8, _UPB_MODE_SCALAR | (_UPB_REP_1BYTE << _UPB_REP_SHIFT)},
{34, UPB_SIZE(4, 4), 2, 0, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{34, UPB_SIZE(4, 4), 2, 1, 14, _UPB_MODE_SCALAR | (_UPB_REP_4BYTE << _UPB_REP_SHIFT)},
{999, UPB_SIZE(12, 16), 0, 0, 11, _UPB_MODE_ARRAY | (_UPB_REP_PTR << _UPB_REP_SHIFT)},
};
@ -512,10 +518,57 @@ static const upb_msglayout *messages_layout[27] = {
&google_protobuf_GeneratedCodeInfo_Annotation_msginit,
};
const upb_enumlayout google_protobuf_FieldDescriptorProto_Label_enuminit = {
NULL,
0xeULL,
0,
};
const upb_enumlayout google_protobuf_FieldDescriptorProto_Type_enuminit = {
NULL,
0x7fffeULL,
0,
};
const upb_enumlayout google_protobuf_FieldOptions_CType_enuminit = {
NULL,
0x7ULL,
0,
};
const upb_enumlayout google_protobuf_FieldOptions_JSType_enuminit = {
NULL,
0x7ULL,
0,
};
const upb_enumlayout google_protobuf_FileOptions_OptimizeMode_enuminit = {
NULL,
0xeULL,
0,
};
const upb_enumlayout google_protobuf_MethodOptions_IdempotencyLevel_enuminit = {
NULL,
0x7ULL,
0,
};
static const upb_enumlayout *enums_layout[6] = {
&google_protobuf_FieldDescriptorProto_Label_enuminit,
&google_protobuf_FieldDescriptorProto_Type_enuminit,
&google_protobuf_FieldOptions_CType_enuminit,
&google_protobuf_FieldOptions_JSType_enuminit,
&google_protobuf_FileOptions_OptimizeMode_enuminit,
&google_protobuf_MethodOptions_IdempotencyLevel_enuminit,
};
const upb_msglayout_file google_protobuf_descriptor_proto_upb_file_layout = {
messages_layout,
enums_layout,
NULL,
27,
6,
0,
};

@ -154,6 +154,13 @@ typedef enum {
} google_protobuf_MethodOptions_IdempotencyLevel;
extern const upb_enumlayout google_protobuf_FieldDescriptorProto_Label_enuminit;
extern const upb_enumlayout google_protobuf_FieldDescriptorProto_Type_enuminit;
extern const upb_enumlayout google_protobuf_FieldOptions_CType_enuminit;
extern const upb_enumlayout google_protobuf_FieldOptions_JSType_enuminit;
extern const upb_enumlayout google_protobuf_FileOptions_OptimizeMode_enuminit;
extern const upb_enumlayout google_protobuf_MethodOptions_IdempotencyLevel_enuminit;
/* google.protobuf.FileDescriptorSet */
UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_new(upb_arena *arena) {

@ -101,12 +101,14 @@ static const unsigned FIXED64_OK_MASK = (1 << UPB_DTYPE_DOUBLE) |
#define OP_MSGSET_ITEM -2
#define OP_MSGSET_TYPEID -3
#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
#define OP_ENUM 1
#define OP_STRING 4
#define OP_BYTES 5
#define OP_SUBMSG 6
/* Ops above are scalar-only. Repeated fields can use any op. */
/* Scalar fields use only ops above. Repeated fields can use any op. */
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
#define OP_PACKED_ENUM 13
static const int8_t varint_ops[] = {
OP_UNKNOWN, /* field not found */
@ -123,7 +125,7 @@ static const int8_t varint_ops[] = {
OP_UNKNOWN, /* MESSAGE */
OP_UNKNOWN, /* BYTES */
OP_SCALAR_LG2(2), /* UINT32 */
OP_SCALAR_LG2(2), /* ENUM */
OP_ENUM, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */
OP_SCALAR_LG2(2), /* SINT32 */
@ -169,7 +171,7 @@ static const int8_t delim_ops[] = {
OP_SUBMSG, /* REPEATED MESSAGE */
OP_BYTES, /* REPEATED BYTES */
OP_VARPCK_LG2(2), /* REPEATED UINT32 */
OP_VARPCK_LG2(2), /* REPEATED ENUM */
OP_PACKED_ENUM, /* REPEATED ENUM */
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
OP_VARPCK_LG2(2), /* REPEATED SINT32 */
@ -384,6 +386,136 @@ static const char *decode_togroup(upb_decstate *d, const char *ptr,
return decode_group(d, ptr, submsg, subl, field->number);
}
static char *encode_varint32(uint32_t val, char *ptr) {
do {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
*(ptr++) = byte;
} while (val);
return ptr;
}
UPB_NOINLINE
static bool decode_checkenum_slow(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_enumlayout *e,
const upb_msglayout_field *field, uint32_t v) {
// OPT: binary search long lists?
int n = e->value_count;
for (int i = 0; i < n; i++) {
if ((uint32_t)e->values[i] == v) return true;
}
// Unrecognized enum goes into unknown fields.
// For packed fields the tag could be arbitrarily far in the past, so we
// just re-encode the tag here.
char buf[20];
char *end = buf;
uint32_t tag = ((uint32_t)field->number << 3) | UPB_WIRE_TYPE_VARINT;
end = encode_varint32(tag, end);
end = encode_varint32(v, end);
if (!_upb_msg_addunknown(msg, buf, end - buf, &d->arena)) {
decode_err(d);
}
return false;
}
UPB_FORCEINLINE
static bool decode_checkenum(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_enumlayout *e,
const upb_msglayout_field *field, wireval *val) {
uint32_t v = val->uint32_val;
if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
return decode_checkenum_slow(d, ptr, msg, e, field, v);
}
UPB_NOINLINE
static const char *decode_enum_toarray(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_array *arr,
const upb_msglayout_sub *subs,
const upb_msglayout_field *field,
wireval *val) {
const upb_enumlayout *e = subs[field->submsg_index].subenum;
if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
arr->len++;
memcpy(mem, val, 4);
return ptr;
}
#include <stdio.h>
UPB_FORCEINLINE
static const char *decode_fixed_packed(upb_decstate *d, const char *ptr,
upb_array *arr, wireval *val,
const upb_msglayout_field *field,
int lg2) {
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
return decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
// Note: if/when the decoder supports multi-buffer input, we will need to
// handle buffer seams here.
memcpy(mem, ptr, val->size);
return ptr + val->size;
}
UPB_FORCEINLINE
static const char *decode_varint_packed(upb_decstate *d, const char *ptr,
upb_array *arr, wireval *val,
const upb_msglayout_field *field,
int lg2) {
int scale = 1 << lg2;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
UPB_NOINLINE
static const char *decode_enum_packed(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_array *arr,
const upb_msglayout_sub *subs,
const upb_msglayout_field *field,
wireval *val) {
const upb_enumlayout *e = subs[field->submsg_index].subenum;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
continue;
}
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
}
arr->len++;
memcpy(out, &elem, 4);
out += 4;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
static const char *decode_toarray(upb_decstate *d, const char *ptr,
upb_msg *msg,
const upb_msglayout_sub *subs,
@ -433,42 +565,18 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
}
}
case OP_FIXPCK_LG2(2):
case OP_FIXPCK_LG2(3): {
/* Fixed packed. */
int lg2 = op - OP_FIXPCK_LG2(0);
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
return decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
memcpy(mem, ptr, val->size); /* XXX: ptr boundary. */
return ptr + val->size;
}
case OP_FIXPCK_LG2(3):
return decode_fixed_packed(d, ptr, arr, val, field,
op - OP_FIXPCK_LG2(0));
case OP_VARPCK_LG2(0):
case OP_VARPCK_LG2(2):
case OP_VARPCK_LG2(3): {
/* Varint packed. */
int lg2 = op - OP_VARPCK_LG2(0);
int scale = 1 << lg2;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
case OP_VARPCK_LG2(3):
return decode_varint_packed(d, ptr, arr, val, field,
op - OP_VARPCK_LG2(0));
case OP_ENUM:
return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
case OP_PACKED_ENUM:
return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
default:
UPB_UNREACHABLE();
}
@ -516,6 +624,12 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
void *mem = UPB_PTR_AT(msg, field->offset, void);
int type = field->descriptortype;
if (UPB_UNLIKELY(op == OP_ENUM) &&
!decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
val)) {
return ptr;
}
/* Set presence if necessary. */
if (field->presence > 0) {
_upb_sethas_field(msg, field);
@ -552,6 +666,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
case OP_SCALAR_LG2(3):
memcpy(mem, val, 8);
break;
case OP_ENUM:
case OP_SCALAR_LG2(2):
memcpy(mem, val, 4);
break;
@ -798,13 +913,12 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
field_number = tag >> 3;
wire_type = tag & 7;
field = decode_findfield(d, layout, field_number, &last_field_index);
if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
d->end_group = field_number;
return ptr;
}
field = decode_findfield(d, layout, field_number, &last_field_index);
ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
if (op >= 0) {

@ -120,6 +120,7 @@ struct upb_msgdef {
struct upb_enumdef {
const google_protobuf_EnumOptions *opts;
const upb_enumlayout *layout; // Only for proto2.
const upb_filedef *file;
const upb_msgdef *containing_type; // Could be merged with "file".
const char *full_name;
@ -1664,6 +1665,11 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
if (upb_fielddef_issubmsg(f)) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].submsg = upb_fielddef_msgsubdef(f)->layout;
} else if (upb_fielddef_type(f) == UPB_TYPE_ENUM &&
f->file->syntax == UPB_SYNTAX_PROTO2) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].subenum = upb_fielddef_enumsubdef(f)->layout;
UPB_ASSERT(subs[field->submsg_index].subenum);
}
if (upb_fielddef_label(f) == UPB_LABEL_REQUIRED) {
@ -2357,6 +2363,83 @@ static void create_service(
}
}
static int count_bits_debug(uint64_t x) {
// For assertions only, speed does not matter.
int n = 0;
while (x) {
if (x & 1) n++;
x >>= 1;
}
return n;
}
upb_enumlayout *create_enumlayout(symtab_addctx *ctx, const upb_enumdef *e) {
int n = 0;
uint64_t mask = 0;
for (int i = 0; i < e->value_count; i++) {
uint32_t val = (uint32_t)e->values[i].number;
if (val < 64) {
mask |= 1 << val;
} else {
n++;
}
}
int32_t *values = symtab_alloc(ctx, sizeof(*values) * n);
if (n) {
int32_t *p = values;
// Add values outside the bitmask range to the list, as described in the
// comments for upb_enumlayout.
for (int i = 0; i < e->value_count; i++) {
int32_t val = e->values[i].number;
if ((uint32_t)val >= 64) {
*p++ = val;
}
}
UPB_ASSERT(p == values + n);
}
UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));
upb_enumlayout *layout = symtab_alloc(ctx, sizeof(*layout));
layout->value_count = n;
layout->mask = mask;
layout->values = values;
return layout;
}
static void create_enumvaldef(
symtab_addctx *ctx, const char *prefix,
const google_protobuf_EnumValueDescriptorProto *val_proto, upb_enumdef *e,
int i) {
upb_enumvaldef *val = (upb_enumvaldef *)&e->values[i];
upb_strview name = google_protobuf_EnumValueDescriptorProto_name(val_proto);
upb_value v = upb_value_constptr(val);
val->enum_ = e; /* Must happen prior to symtab_add(). */
val->full_name = makefullname(ctx, prefix, name);
val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto);
symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL));
SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto);
if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && val->number != 0) {
symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
e->full_name);
}
CHK_OOM(upb_strtable_insert(&e->ntoi, name.data, name.size, v, ctx->arena));
// Multiple enumerators can have the same number, first one wins.
if (!upb_inttable_lookup(&e->iton, val->number, NULL)) {
CHK_OOM(upb_inttable_insert(&e->iton, val->number, v, ctx->arena));
}
}
static void create_enumdef(
symtab_addctx *ctx, const char *prefix,
const google_protobuf_EnumDescriptorProto *enum_proto,
@ -2392,32 +2475,22 @@ static void create_enumdef(
SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);
for (i = 0; i < n; i++) {
const google_protobuf_EnumValueDescriptorProto *val_proto = values[i];
upb_enumvaldef *val = (upb_enumvaldef*)&e->values[i];
upb_strview name = google_protobuf_EnumValueDescriptorProto_name(val_proto);
upb_value v = upb_value_constptr(val);
val->enum_ = e; /* Must happen prior to symtab_add(). */
val->full_name = makefullname(ctx, prefix, name);
val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto);
symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL));
SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto);
if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && val->number != 0) {
symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
e->full_name);
create_enumvaldef(ctx, prefix, values[i], e, i);
}
CHK_OOM(upb_strtable_insert(&e->ntoi, name.data, name.size, v, ctx->arena));
upb_inttable_compact(&e->iton, ctx->arena);
// Multiple enumerators can have the same number, first one wins.
if (!upb_inttable_lookup(&e->iton, val->number, NULL)) {
CHK_OOM(upb_inttable_insert(&e->iton, val->number, v, ctx->arena));
if (e->file->syntax == UPB_SYNTAX_PROTO2) {
if (ctx->layout) {
UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
e->layout = ctx->layout->enums[ctx->enum_count++];
UPB_ASSERT(n == e->layout->value_count + count_bits_debug(e->layout->mask));
} else {
e->layout = create_enumlayout(ctx, e);
}
} else {
e->layout = NULL;
}
upb_inttable_compact(&e->iton, ctx->arena);
}
static void create_msgdef(symtab_addctx *ctx, const char *prefix,

@ -66,8 +66,8 @@ enum {
typedef struct {
uint32_t number;
uint16_t offset;
int16_t presence; /* If >0, hasbit_index. If <0, ~oneof_index. */
uint16_t submsg_index; /* undefined if descriptortype != MESSAGE or GROUP. */
int16_t presence; // If >0, hasbit_index. If <0, ~oneof_index
uint16_t submsg_index; // undefined if descriptortype != MESSAGE/GROUP/ENUM
uint8_t descriptortype;
uint8_t mode; /* upb_fieldmode | upb_labelflags |
(upb_rep << _UPB_REP_SHIFT) */
@ -130,9 +130,15 @@ typedef struct {
_upb_field_parser *field_parser;
} _upb_fasttable_entry;
typedef struct {
const int32_t *values; // List of values <0 or >63
uint64_t mask; // Bits are set for acceptable value 0 <= x < 64
int value_count;
} upb_enumlayout;
typedef union {
const struct upb_msglayout *submsg;
// TODO: const upb_enumlayout *subenum;
const upb_enumlayout *subenum;
} upb_msglayout_sub;
typedef enum {
@ -179,8 +185,10 @@ typedef struct {
typedef struct {
const upb_msglayout **msgs;
const upb_enumlayout **enums;
const upb_msglayout_ext **exts;
int msg_count;
int enum_count;
int ext_count;
} upb_msglayout_file;

@ -26,6 +26,7 @@
*/
#include "gtest/gtest.h"
#include "gmock/gmock.h"
#include "src/google/protobuf/test_messages_proto3.upb.h"
#include "upb/def.hpp"
#include "upb/json_decode.h"
@ -99,7 +100,9 @@ TEST(MessageTest, Extensions) {
}
void VerifyMessageSet(const upb_test_TestMessageSet *mset_msg) {
EXPECT_TRUE(upb_test_MessageSetMember_has_message_set_extension(mset_msg));
bool has = upb_test_MessageSetMember_has_message_set_extension(mset_msg);
EXPECT_TRUE(has);
if (!has) return;
const upb_test_MessageSetMember *member =
upb_test_MessageSetMember_message_set_extension(mset_msg);
EXPECT_TRUE(member != nullptr);
@ -152,3 +155,72 @@ TEST(MessageTest, MessageSet) {
<< status.error_message();
VerifyMessageSet(ext_msg3);
}
TEST(MessageTest, Proto2Enum) {
upb::Arena arena;
upb_test_Proto2FakeEnumMessage *fake_msg = upb_test_Proto2FakeEnumMessage_new(arena.ptr());
upb_test_Proto2FakeEnumMessage_set_optional_enum(fake_msg, 999);
int32_t *vals = upb_test_Proto2FakeEnumMessage_resize_repeated_enum(
fake_msg, 6, arena.ptr());
vals[0] = upb_test_Proto2EnumMessage_ZERO;
vals[1] = 7; // Unknown small.
vals[2] = upb_test_Proto2EnumMessage_SMALL;
vals[3] = 888; // Unknown large.
vals[4] = upb_test_Proto2EnumMessage_LARGE;
vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
vals = upb_test_Proto2FakeEnumMessage_resize_packed_enum(
fake_msg, 6, arena.ptr());
vals[0] = upb_test_Proto2EnumMessage_ZERO;
vals[1] = 7; // Unknown small.
vals[2] = upb_test_Proto2EnumMessage_SMALL;
vals[3] = 888; // Unknown large.
vals[4] = upb_test_Proto2EnumMessage_LARGE;
vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
size_t size;
char *pb =
upb_test_Proto2FakeEnumMessage_serialize(fake_msg, arena.ptr(), &size);
// Parsing as enums puts unknown values into unknown fields.
upb_test_Proto2EnumMessage *enum_msg =
upb_test_Proto2EnumMessage_parse(pb, size, arena.ptr());
ASSERT_TRUE(enum_msg != nullptr);
EXPECT_EQ(false, upb_test_Proto2EnumMessage_has_optional_enum(enum_msg));
const int32_t *vals_const =
upb_test_Proto2EnumMessage_repeated_enum(enum_msg, &size);
EXPECT_EQ(4, size); // Two unknown values moved to the unknown field set.
// Parsing back into the fake message shows the original data, except the
// repeated enum is rearranged.
pb = upb_test_Proto2EnumMessage_serialize(enum_msg, arena.ptr(), &size);
upb_test_Proto2FakeEnumMessage *fake_msg2 =
upb_test_Proto2FakeEnumMessage_parse(pb, size, arena.ptr());
EXPECT_EQ(true, upb_test_Proto2FakeEnumMessage_has_optional_enum(fake_msg2));
EXPECT_EQ(999, upb_test_Proto2FakeEnumMessage_optional_enum(fake_msg2));
int32_t expected[] = {
upb_test_Proto2EnumMessage_ZERO,
upb_test_Proto2EnumMessage_SMALL,
upb_test_Proto2EnumMessage_LARGE,
upb_test_Proto2EnumMessage_NEGATIVE,
7,
888,
};
vals_const =
upb_test_Proto2FakeEnumMessage_repeated_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
::testing::ElementsAreArray(expected));
vals_const =
upb_test_Proto2FakeEnumMessage_packed_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
::testing::ElementsAreArray(expected));
}

@ -58,3 +58,24 @@ message MessageSetMember {
optional MessageSetMember message_set_extension = 4;
}
}
message Proto2EnumMessage {
enum Proto2TestEnum {
ZERO = 0;
NEGATIVE = -1;
SMALL = 15;
LARGE = 12345;
}
optional Proto2TestEnum optional_enum = 1;
repeated Proto2TestEnum repeated_enum = 2;
repeated Proto2TestEnum packed_enum = 3 [packed = true];
}
// The same fields as Proto2EnumMessage, but with int32 fields so we can fake
// wire format.
message Proto2FakeEnumMessage {
optional int32 optional_enum = 1;
repeated int32 repeated_enum = 2;
repeated int32 packed_enum = 3 [packed = true];
}

@ -51,6 +51,10 @@ std::string MessageInit(const protobuf::Descriptor* descriptor) {
return MessageName(descriptor) + "_msginit";
}
std::string EnumInit(const protobuf::EnumDescriptor* descriptor) {
return ToCIdent(descriptor->full_name()) + "_enuminit";
}
std::string ExtensionIdentBase(const protobuf::FieldDescriptor* ext) {
assert(ext->is_extension());
std::string ext_scope;
@ -65,8 +69,9 @@ std::string ExtensionLayout(const google::protobuf::FieldDescriptor* ext) {
return absl::StrCat(ExtensionIdentBase(ext), "_", ext->name(), "_ext");
}
const char *kMessagesInit = "messages_layout";
const char *kEnumsInit = "enums_layout";
const char *kExtensionsInit = "extensions_layout";
const char *kMessagesInit = "messages_layout";
void AddEnums(const protobuf::Descriptor* message,
std::vector<const protobuf::EnumDescriptor*>* enums) {
@ -179,6 +184,24 @@ std::vector<const protobuf::FieldDescriptor*> SortedSubmessages(
return ret;
}
std::vector<const protobuf::FieldDescriptor*> SortedSubEnums(
const protobuf::Descriptor* message) {
std::vector<const protobuf::FieldDescriptor*> ret;
for (int i = 0; i < message->field_count(); i++) {
if (message->field(i)->cpp_type() ==
protobuf::FieldDescriptor::CPPTYPE_ENUM) {
ret.push_back(message->field(i));
}
}
std::sort(ret.begin(), ret.end(),
[](const protobuf::FieldDescriptor* a,
const protobuf::FieldDescriptor* b) {
return a->enum_type()->full_name() <
b->enum_type()->full_name();
});
return ret;
}
std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) {
return ToCIdent(value->full_name());
}
@ -721,6 +744,14 @@ void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
output("\n");
if (file->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) {
for (const auto* enumdesc : this_file_enums) {
output("extern const upb_enumlayout $0;\n", EnumInit(enumdesc));
}
}
output("\n");
for (auto message : this_file_messages) {
GenerateMessageInHeader(message, output);
}
@ -787,45 +818,73 @@ int TableDescriptorType(const protobuf::FieldDescriptor* field) {
// embedding field names on the side) we will have to revisit this, because
// string vs. bytes behavior is not affected by proto2 vs proto3.
return protobuf::FieldDescriptor::TYPE_BYTES;
} else if (field->enum_type() &&
field->enum_type()->file()->syntax() ==
protobuf::FileDescriptor::SYNTAX_PROTO3) {
// From the perspective of the binary decoder, proto3 enums are identical to
// int32 fields. Only in proto2 do we check enum values to make sure they
// are defined in the enum.
return protobuf::FieldDescriptor::TYPE_INT32;
} else {
return field->type();
}
}
struct SubmsgArray {
struct SubLayoutArray {
public:
SubmsgArray(const protobuf::Descriptor* message) : message_(message) {
MessageLayout layout(message);
std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
SortedSubmessages(message);
int i = 0;
for (auto submsg : sorted_submsgs) {
if (indexes_.find(submsg->message_type()) != indexes_.end()) {
continue;
}
submsgs_.push_back(submsg->message_type());
indexes_[submsg->message_type()] = i++;
}
}
SubLayoutArray(const protobuf::Descriptor* message);
const std::vector<const protobuf::Descriptor*>& submsgs() const {
return submsgs_;
}
int GetIndex(const protobuf::FieldDescriptor* field) {
(void)message_;
assert(field->containing_type() == message_);
auto it = indexes_.find(field->message_type());
const std::vector<const protobuf::EnumDescriptor*>& subenums() const {
return subenums_;
}
int total_count() const { return submsgs_.size() + subenums_.size(); }
int GetIndex(const void *sub) {
auto it = indexes_.find(sub);
assert(it != indexes_.end());
return it->second;
}
private:
const protobuf::Descriptor* message_;
std::vector<const protobuf::Descriptor*> submsgs_;
absl::flat_hash_map<const protobuf::Descriptor*, int> indexes_;
std::vector<const protobuf::EnumDescriptor*> subenums_;
absl::flat_hash_map<const void*, int> indexes_;
};
SubLayoutArray::SubLayoutArray(const protobuf::Descriptor* message) {
MessageLayout layout(message);
std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
SortedSubmessages(message);
int i = 0;
for (const auto* submsg : sorted_submsgs) {
if (!indexes_.try_emplace(submsg->message_type(), i).second) {
// Already present.
continue;
}
submsgs_.push_back(submsg->message_type());
i++;
}
std::vector<const protobuf::FieldDescriptor*> sorted_subenums =
SortedSubEnums(message);
for (const auto* field : sorted_subenums) {
if (field->file()->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) {
continue;
}
if (!indexes_.try_emplace(field->enum_type(), i).second) {
// Already present.
continue;
}
subenums_.push_back(field->enum_type());
i++;
}
}
typedef std::pair<std::string, uint64_t> TableEntry;
uint64_t GetEncodedTag(const protobuf::FieldDescriptor* field) {
@ -861,8 +920,13 @@ bool TryFillTableEntry(const protobuf::Descriptor* message,
case protobuf::FieldDescriptor::TYPE_BOOL:
type = "b1";
break;
case protobuf::FieldDescriptor::TYPE_INT32:
case protobuf::FieldDescriptor::TYPE_ENUM:
if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) {
// We don't have the means to test proto2 enum fields for valid values.
return false;
}
ABSL_FALLTHROUGH_INTENDED;
case protobuf::FieldDescriptor::TYPE_INT32:
case protobuf::FieldDescriptor::TYPE_UINT32:
type = "v4";
break;
@ -957,8 +1021,8 @@ bool TryFillTableEntry(const protobuf::Descriptor* message,
}
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
SubmsgArray submsg_array(message);
uint64_t idx = submsg_array.GetIndex(field);
SubLayoutArray sublayout_array(message);
uint64_t idx = sublayout_array.GetIndex(field->message_type());
if (idx > 255) return false;
data |= idx << 16;
@ -1083,19 +1147,22 @@ void WriteMessage(const protobuf::Descriptor* message, Output& output,
uint8_t dense_below = 0;
const int dense_below_max = std::numeric_limits<decltype(dense_below)>::max();
MessageLayout layout(message);
SubmsgArray submsg_array(message);
SubLayoutArray sublayout_array(message);
if (!submsg_array.submsgs().empty()) {
if (sublayout_array.total_count()) {
// TODO(haberman): could save a little bit of space by only generating a
// "submsgs" array for every strongly-connected component.
std::string submsgs_array_name = msg_name + "_submsgs";
submsgs_array_ref = "&" + submsgs_array_name + "[0]";
output("static const upb_msglayout_sub $0[$1] = {\n",
submsgs_array_name, submsg_array.submsgs().size());
submsgs_array_name, sublayout_array.total_count());
for (auto submsg : submsg_array.submsgs()) {
for (const auto* submsg : sublayout_array.submsgs()) {
output(" {.submsg = &$0},\n", MessageInit(submsg));
}
for (const auto* subenum : sublayout_array.subenums()) {
output(" {.subenum = &$0},\n", EnumInit(subenum));
}
output("};\n\n");
}
@ -1109,7 +1176,7 @@ void WriteMessage(const protobuf::Descriptor* message, Output& output,
fields_array_name, field_number_order.size());
for (int i = 0; i < static_cast<int>(field_number_order.size()); i++) {
auto field = field_number_order[i];
int submsg_index = 0;
int sublayout_index = 0;
if (i < dense_below_max && field->number() == i + 1 &&
(i == 0 || field_number_order[i - 1]->number() == i)) {
@ -1117,10 +1184,14 @@ void WriteMessage(const protobuf::Descriptor* message, Output& output,
}
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
submsg_index = submsg_array.GetIndex(field);
sublayout_index = sublayout_array.GetIndex(field->message_type());
} else if (field->enum_type() &&
field->enum_type()->file()->syntax() ==
protobuf::FileDescriptor::SYNTAX_PROTO2) {
sublayout_index = sublayout_array.GetIndex(field->enum_type());
}
WriteMessageField(field, layout, submsg_index, output);
WriteMessageField(field, layout, sublayout_index, output);
}
output("};\n\n");
}
@ -1168,6 +1239,61 @@ void WriteMessage(const protobuf::Descriptor* message, Output& output,
output("};\n\n");
}
int WriteEnums(const protobuf::FileDescriptor* file, Output& output) {
if (file->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) {
return 0;
}
std::vector<const protobuf::EnumDescriptor*> this_file_enums =
SortedEnums(file);
std::string values_init = "NULL";
for (const auto* e : this_file_enums) {
uint64_t mask = 0;
absl::flat_hash_set<int32_t> values;
for (int i = 0; i < e->value_count(); i++) {
int32_t number = e->value(i)->number();
if (static_cast<uint32_t>(number) < 64) {
mask |= 1 << number;
} else {
values.insert(number);
}
}
std::vector<int32_t> values_vec(values.begin(), values.end());
std::sort(values_vec.begin(), values_vec.end());
if (!values_vec.empty()) {
values_init = EnumInit(e) + "_values";
output("static const int32_t $0[$1] = {\n", values_init,
values_vec.size());
for (auto value : values_vec) {
output(" $0,\n", value);
}
output("};\n\n");
}
output("const upb_enumlayout $0 = {\n", EnumInit(e));
output(" $0,\n", values_init);
output(" 0x$0ULL,\n", absl::Hex(mask));
output(" $0,\n", values_vec.size());
output("};\n\n");
}
if (!this_file_enums.empty()) {
output("static const upb_enumlayout *$0[$1] = {\n", kEnumsInit,
this_file_enums.size());
for (const auto* e : this_file_enums) {
output(" &$0,\n", EnumInit(e));
}
output("};\n");
output("\n");
}
return this_file_enums.size();
}
void WriteExtension(const protobuf::FieldDescriptor* ext, Output& output) {
output("const upb_msglayout_ext $0 = {\n ", ExtensionLayout(ext));
WriteField(ext, "0", "0", 0, output);
@ -1264,11 +1390,14 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output,
int msg_count = WriteMessages(file, output, fasttable_enabled);
int ext_count = WriteExtensions(file, output);
int enum_count = WriteEnums(file, output);
output("const upb_msglayout_file $0 = {\n", FileLayoutName(file));
output(" $0,\n", msg_count ? kMessagesInit : "NULL");
output(" $0,\n", enum_count ? kEnumsInit : "NULL");
output(" $0,\n", ext_count ? kExtensionsInit : "NULL");
output(" $0,\n", msg_count);
output(" $0,\n", enum_count);
output(" $0,\n", ext_count);
output("};\n\n");

Loading…
Cancel
Save