Treat unlinked sub-messages in the MiniTable as unknown

This is an observable behavior change in the decoder.  After submitting this CL, clients of the decoder can assume that any unlinked sub-messages will be treated as unknown, rather than crashing.

Unlinked sub-messages must never have values present in the message.  We can verify this with asserts.  Since the values are never set, the encoder should never encounter data for any unlinked sub-message.

```
name                                           old cpu/op  new cpu/op  delta
BM_ArenaOneAlloc                                 18.3ns ± 9%  17.9ns ± 2%    ~     (p=0.690 n=5+5)
BM_ArenaInitialBlockOneAlloc                     6.40ns ± 1%  6.68ns ±10%    ~     (p=0.730 n=4+5)
BM_LoadAdsDescriptor_Upb<NoLayout>               5.09ms ± 2%  5.03ms ± 3%    ~     (p=0.222 n=5+5)
BM_LoadAdsDescriptor_Upb<WithLayout>             5.45ms ± 3%  5.43ms ± 1%    ~     (p=0.905 n=5+4)
BM_LoadAdsDescriptor_Proto2<NoLayout>            10.9ms ± 1%  10.8ms ± 1%  -1.09%  (p=0.016 n=5+4)
BM_LoadAdsDescriptor_Proto2<WithLayout>          11.3ms ± 9%  11.1ms ± 3%    ~     (p=0.841 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Copy>            11.2µs ± 3%  11.3µs ± 3%    ~     (p=0.222 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Alias>           10.3µs ± 5%  10.5µs ± 5%    ~     (p=0.310 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Copy>           11.4µs ±18%  11.0µs ± 2%    ~     (p=1.000 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Alias>          10.5µs ±17%  10.6µs ±19%    ~     (p=0.421 n=5+5)
BM_Parse_Proto2<FileDesc, NoArena, Copy>         20.5µs ± 2%  20.2µs ± 2%    ~     (p=0.222 n=5+5)
BM_Parse_Proto2<FileDesc, UseArena, Copy>        10.8µs ± 2%  10.9µs ± 4%    ~     (p=0.841 n=5+5)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>       10.5µs ± 3%  10.6µs ± 3%    ~     (p=0.690 n=5+5)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>    9.22µs ± 2%  9.23µs ± 3%    ~     (p=1.000 n=5+5)
BM_SerializeDescriptor_Proto2                    6.05µs ± 3%  5.90µs ± 3%    ~     (p=0.222 n=5+5)
BM_SerializeDescriptor_Upb                       10.2µs ± 3%  10.6µs ±14%    ~     (p=0.841 n=5+5)

name                                           old time/op             new time/op             delta
BM_ArenaOneAlloc                                 18.3ns ± 9%             17.9ns ± 2%    ~             (p=0.841 n=5+5)
BM_ArenaInitialBlockOneAlloc                     6.42ns ± 1%             6.69ns ±10%    ~             (p=0.730 n=4+5)
BM_LoadAdsDescriptor_Upb<NoLayout>               5.10ms ± 2%             5.05ms ± 3%    ~             (p=0.222 n=5+5)
BM_LoadAdsDescriptor_Upb<WithLayout>             5.47ms ± 3%             5.45ms ± 1%    ~             (p=0.905 n=5+4)
BM_LoadAdsDescriptor_Proto2<NoLayout>            10.9ms ± 1%             10.8ms ± 1%  -1.11%          (p=0.016 n=5+4)
BM_LoadAdsDescriptor_Proto2<WithLayout>          11.4ms ± 9%             11.1ms ± 3%    ~             (p=0.841 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Copy>            11.2µs ± 3%             11.3µs ± 3%    ~             (p=0.222 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Alias>           10.3µs ± 5%             10.5µs ± 5%    ~             (p=0.151 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Copy>           11.5µs ±18%             11.0µs ± 2%    ~             (p=1.000 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Alias>          10.5µs ±17%             10.7µs ±19%    ~             (p=0.421 n=5+5)
BM_Parse_Proto2<FileDesc, NoArena, Copy>         20.6µs ± 2%             20.3µs ± 2%    ~             (p=0.222 n=5+5)
BM_Parse_Proto2<FileDesc, UseArena, Copy>        10.9µs ± 2%             10.9µs ± 4%    ~             (p=0.841 n=5+5)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>       10.6µs ± 3%             10.6µs ± 3%    ~             (p=0.690 n=5+5)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>    9.24µs ± 2%             9.25µs ± 3%    ~             (p=1.000 n=5+5)
BM_SerializeDescriptor_Proto2                    6.07µs ± 3%             5.91µs ± 3%    ~             (p=0.222 n=5+5)
BM_SerializeDescriptor_Upb                       10.3µs ± 3%             10.6µs ±14%    ~             (p=0.841 n=5+5)

name                                           old INSTRUCTIONS/op     new INSTRUCTIONS/op     delta
BM_ArenaOneAlloc                                    201 ± 0%                201 ± 0%    ~             (p=0.841 n=5+5)
BM_ArenaInitialBlockOneAlloc                       69.0 ± 0%               69.0 ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout>                33.9M ± 0%              34.1M ± 0%  +0.66%          (p=0.008 n=5+5)
BM_LoadAdsDescriptor_Upb<WithLayout>              35.6M ± 0%              35.8M ± 0%  +0.64%          (p=0.008 n=5+5)
BM_LoadAdsDescriptor_Proto2<NoLayout>             70.8M ± 0%              70.8M ± 0%    ~             (p=0.548 n=5+5)
BM_LoadAdsDescriptor_Proto2<WithLayout>           71.6M ± 0%              71.6M ± 0%    ~             (p=0.151 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Copy>              137k ± 0%               141k ± 0%  +2.87%          (p=0.008 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Alias>             125k ± 0%               128k ± 0%  +2.83%          (p=0.008 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Copy>             135k ± 0%               139k ± 0%  +2.89%          (p=0.008 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Alias>            124k ± 0%               127k ± 0%  +2.85%          (p=0.016 n=5+4)
BM_Parse_Proto2<FileDesc, NoArena, Copy>           201k ± 0%               201k ± 0%    ~             (p=0.222 n=5+5)
BM_Parse_Proto2<FileDesc, UseArena, Copy>          107k ± 0%               107k ± 0%    ~             (p=1.000 n=5+5)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>         105k ± 0%               105k ± 0%    ~             (p=0.286 n=5+4)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>     86.5k ± 0%              86.5k ± 0%    ~             (p=0.222 n=5+5)
BM_SerializeDescriptor_Proto2                     60.3k ± 0%              60.3k ± 0%    ~             (p=0.071 n=5+5)
BM_SerializeDescriptor_Upb                         111k ± 0%               111k ± 0%    ~             (p=0.841 n=5+5)

name                                           old CYCLES/op           new CYCLES/op           delta
BM_ArenaOneAlloc                                   60.0 ± 7%               58.8 ± 0%  -2.15%          (p=0.016 n=5+5)
BM_ArenaInitialBlockOneAlloc                       21.0 ± 0%               21.0 ± 0%    ~             (p=1.000 n=5+5)
BM_LoadAdsDescriptor_Upb<NoLayout>                16.9M ± 0%              16.9M ± 0%    ~             (p=0.056 n=5+5)
BM_LoadAdsDescriptor_Upb<WithLayout>              17.9M ± 1%              18.0M ± 1%    ~             (p=0.095 n=5+5)
BM_LoadAdsDescriptor_Proto2<NoLayout>             35.9M ± 1%              35.8M ± 1%    ~             (p=0.421 n=5+5)
BM_LoadAdsDescriptor_Proto2<WithLayout>           36.5M ± 0%              36.5M ± 0%    ~             (p=0.841 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Copy>             37.2k ± 0%              37.3k ± 0%    ~             (p=0.222 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Alias>            34.1k ± 0%              34.7k ± 0%  +1.66%          (p=0.008 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Copy>            36.4k ± 0%              36.7k ± 0%  +0.83%          (p=0.008 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Alias>           33.3k ± 1%              34.1k ± 1%  +2.39%          (p=0.008 n=5+5)
BM_Parse_Proto2<FileDesc, NoArena, Copy>          68.1k ± 1%              68.0k ± 1%    ~             (p=0.421 n=5+5)
BM_Parse_Proto2<FileDesc, UseArena, Copy>         36.0k ± 1%              36.1k ± 1%    ~             (p=0.841 n=5+5)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>        35.3k ± 1%              35.5k ± 1%    ~             (p=0.151 n=5+5)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>     30.7k ± 0%              30.9k ± 1%    ~             (p=0.151 n=5+5)
BM_SerializeDescriptor_Proto2                     20.3k ± 2%              19.7k ± 3%    ~             (p=0.151 n=5+5)
BM_SerializeDescriptor_Upb                        33.6k ± 0%              33.7k ± 2%    ~             (p=1.000 n=5+5)

name                                           old allocs/op           new allocs/op           delta
BM_ArenaOneAlloc                                   1.19 ± 0%               1.19 ± 0%    ~     (all samples are equal)
BM_ArenaInitialBlockOneAlloc                       0.19 ± 0%               0.19 ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout>                6.00k ± 0%              6.00k ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<WithLayout>              5.99k ± 0%              5.99k ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<NoLayout>             77.8k ± 0%              77.8k ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout>           79.0k ± 0%              79.0k ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Copy>              7.19 ± 0%               7.19 ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias>             7.19 ± 0%               7.19 ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<InitBlock, Copy>             0.19 ± 0%               0.19 ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<InitBlock, Alias>            0.19 ± 0%               0.19 ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy>            765 ± 0%                765 ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy>          10.2 ± 0%               10.2 ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>         1.19 ± 0%               1.19 ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>      1.19 ± 0%               1.19 ± 0%    ~     (all samples are equal)
BM_SerializeDescriptor_Proto2                      0.19 ± 0%               0.19 ± 0%    ~     (all samples are equal)
BM_SerializeDescriptor_Upb                         0.19 ± 0%               0.19 ± 0%    ~     (all samples are equal)

name                                           old peak-mem(Bytes)/op  new peak-mem(Bytes)/op  delta
BM_ArenaOneAlloc                                    344 ± 0%                344 ± 0%    ~     (all samples are equal)
BM_ArenaInitialBlockOneAlloc                        112 ± 0%                112 ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout>                9.64M ± 0%              9.64M ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<WithLayout>              9.70M ± 0%              9.70M ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<NoLayout>             6.38M ± 0%              6.38M ± 0%    ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout>           6.44M ± 0%              6.44M ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Copy>             36.5k ± 0%              36.5k ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias>            36.5k ± 0%              36.5k ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<InitBlock, Copy>              112 ± 0%                112 ± 0%    ~     (all samples are equal)
BM_Parse_Upb_FileDesc<InitBlock, Alias>             112 ± 0%                112 ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy>          35.8k ± 0%              35.8k ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy>         40.8k ± 0%              40.8k ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>          112 ± 0%                112 ± 0%    ~     (all samples are equal)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>       112 ± 0%                112 ± 0%    ~     (all samples are equal)
BM_SerializeDescriptor_Proto2                       112 ± 0%                112 ± 0%    ~     (all samples are equal)
BM_SerializeDescriptor_Upb                          112 ± 0%                112 ± 0%    ~     (all samples are equal)

name                                           old speed               new speed               delta
BM_LoadAdsDescriptor_Upb<NoLayout>              147MB/s ± 2%            148MB/s ± 3%    ~             (p=0.222 n=5+5)
BM_LoadAdsDescriptor_Upb<WithLayout>            137MB/s ± 3%            137MB/s ± 1%    ~             (p=0.905 n=5+4)
BM_LoadAdsDescriptor_Proto2<NoLayout>          68.6MB/s ± 1%           69.3MB/s ± 1%  +1.10%          (p=0.016 n=5+4)
BM_LoadAdsDescriptor_Proto2<WithLayout>        66.0MB/s ± 9%           67.4MB/s ± 3%    ~             (p=0.841 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Copy>           675MB/s ± 3%            667MB/s ± 3%    ~             (p=0.222 n=5+5)
BM_Parse_Upb_FileDesc<UseArena, Alias>          730MB/s ± 5%            718MB/s ± 5%    ~             (p=0.310 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Copy>          663MB/s ±16%            685MB/s ± 2%    ~             (p=1.000 n=5+5)
BM_Parse_Upb_FileDesc<InitBlock, Alias>         723MB/s ±15%            712MB/s ±16%    ~             (p=0.421 n=5+5)
BM_Parse_Proto2<FileDesc, NoArena, Copy>        367MB/s ± 2%            372MB/s ± 2%    ~             (p=0.222 n=5+5)
BM_Parse_Proto2<FileDesc, UseArena, Copy>       694MB/s ± 2%            691MB/s ± 4%    ~             (p=0.841 n=5+5)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>      714MB/s ± 3%            709MB/s ± 3%    ~             (p=0.690 n=5+5)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>   816MB/s ± 2%            816MB/s ± 3%    ~             (p=1.000 n=5+5)
BM_SerializeDescriptor_Proto2                  1.24GB/s ± 3%           1.28GB/s ± 3%    ~             (p=0.222 n=5+5)
BM_SerializeDescriptor_Upb                      734MB/s ± 3%            713MB/s ±13%    ~             (p=0.841 n=5+5)
```

PiperOrigin-RevId: 477770562
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent 1e3deb013d
commit d5bd55cde1
  1. 25
      upb/decode.c
  2. 10
      upb/mini_table.h
  3. 8
      upb/mini_table_accessors.h
  4. 4
      upb/mini_table_accessors_test.cc
  5. 8
      upb/reflection/message.c

@ -940,7 +940,8 @@ int _upb_Decoder_GetVarintOp(const upb_MiniTable_Field* field) {
return kVarintOps[field->descriptortype];
}
int _upb_Decoder_GetDelimitedOp(const upb_MiniTable_Field* field) {
int _upb_Decoder_GetDelimitedOp(const upb_MiniTable* mt,
const upb_MiniTable_Field* field) {
enum { kRepeatedBase = 19 };
static const int8_t kDelimitedOps[] = {
@ -991,13 +992,24 @@ int _upb_Decoder_GetDelimitedOp(const upb_MiniTable_Field* field) {
int ndx = field->descriptortype;
if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += kRepeatedBase;
return kDelimitedOps[ndx];
int op = kDelimitedOps[ndx];
// If sub-message is not linked, treat as unknown.
if (op == kUpb_DecodeOp_SubMessage &&
!(field->mode & kUpb_LabelFlags_IsExtension)) {
const upb_MiniTable_Sub* sub = &mt->subs[field->submsg_index];
if (!sub->submsg) {
op = kUpb_DecodeOp_UnknownField;
}
}
return op;
}
UPB_FORCEINLINE
static const char* _upb_Decoder_DecodeWireValue(
upb_Decoder* d, const char* ptr, const upb_MiniTable_Field* field,
int wire_type, wireval* val, int* op) {
upb_Decoder* d, const char* ptr, const upb_MiniTable* mt,
const upb_MiniTable_Field* field, int wire_type, wireval* val, int* op) {
static const unsigned kFixed32OkMask = (1 << kUpb_FieldType_Float) |
(1 << kUpb_FieldType_Fixed32) |
(1 << kUpb_FieldType_SFixed32);
@ -1030,7 +1042,7 @@ static const char* _upb_Decoder_DecodeWireValue(
return ptr + 8;
case kUpb_WireType_Delimited:
ptr = upb_Decoder_DecodeSize(d, ptr, &val->size);
*op = _upb_Decoder_GetDelimitedOp(field);
*op = _upb_Decoder_GetDelimitedOp(mt, field);
return ptr;
case kUpb_WireType_StartGroup:
val->uint32_val = field->number;
@ -1189,7 +1201,8 @@ static const char* _upb_Decoder_DecodeMessage(upb_Decoder* d, const char* ptr,
}
field = _upb_Decoder_FindField(d, layout, field_number, &last_field_index);
ptr = _upb_Decoder_DecodeWireValue(d, ptr, field, wire_type, &val, &op);
ptr = _upb_Decoder_DecodeWireValue(d, ptr, layout, field, wire_type, &val,
&op);
if (op >= 0) {
ptr = _upb_Decoder_DecodeKnownField(d, ptr, msg, layout, field, op, &val);

@ -135,9 +135,19 @@ typedef enum {
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, upb_Status* status);
// Links a sub-message field to a MiniTable for that sub-message. If a
// sub-message field is not linked, it will be treated as an unknown field
// during parsing, and setting the field will not be allowed. It is possible
// to link the message field later, at which point it will no longer be treated
// as unknown. However there is no synchronization for this operation, which
// means parallel mutation requires external synchronization.
void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
upb_MiniTable_Field* field,
const upb_MiniTable* sub);
// Links an enum field to a MiniTable for that enum. All enum fields must
// be linked prior to parsing.
void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
const upb_MiniTable_Enum* sub);

@ -192,10 +192,12 @@ UPB_INLINE const upb_Message* upb_MiniTable_GetMessage(
}
UPB_INLINE void upb_MiniTable_SetMessage(upb_Message* msg,
const upb_MiniTable* mini_table,
const upb_MiniTable_Field* field,
upb_Message* sub_message) {
UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message ||
field->descriptortype == kUpb_FieldType_Group);
UPB_ASSERT(mini_table->subs[field->submsg_index].submsg);
_upb_MiniTable_SetPresence(msg, field);
*UPB_PTR_AT(msg, field->offset, const upb_Message*) = sub_message;
}
@ -207,8 +209,10 @@ UPB_INLINE upb_Message* upb_MiniTable_GetMutableMessage(
field->descriptortype == kUpb_FieldType_Group);
upb_Message* sub_message = *UPB_PTR_AT(msg, field->offset, upb_Message*);
if (!sub_message) {
sub_message =
_upb_Message_New(mini_table->subs[field->submsg_index].submsg, arena);
const upb_MiniTable* sub_mini_table =
mini_table->subs[field->submsg_index].submsg;
UPB_ASSERT(sub_mini_table);
sub_message = _upb_Message_New(sub_mini_table, arena);
*UPB_PTR_AT(msg, field->offset, upb_Message*) = sub_message;
_upb_MiniTable_SetPresence(msg, field);
}

@ -289,7 +289,9 @@ TEST(GeneratedCode, SubMessage) {
upb_Message* new_nested_message =
protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_new(arena);
upb_MiniTable_SetInt32(new_nested_message, nested_message_a_field, 123);
upb_MiniTable_SetMessage(msg, optional_message_field, new_nested_message);
upb_MiniTable_SetMessage(
msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,
optional_message_field, new_nested_message);
upb_Message* mutable_message = upb_MiniTable_GetMutableMessage(
msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init,

@ -183,6 +183,14 @@ bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f,
memcpy(&ext->data, &val, sizeof(val));
} else {
const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
// Building reflection should always cause all sub-message types to be
// linked, but double-check here just for extra assurance.
UPB_ASSERT(!upb_FieldDef_IsSubMessage(f) ||
upb_MessageDef_MiniTable(upb_FieldDef_ContainingType(f))
->subs[field->submsg_index]
.submsg);
char* mem = UPB_PTR_AT(msg, field->offset, char);
memcpy(mem, &val, get_field_size(field));
if (field->presence > 0) {

Loading…
Cancel
Save