Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1206 lines
43 KiB

/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/mini_table.h"
#include <inttypes.h>
#include <setjmp.h>
#include "upb/arena.h"
#include "upb/msg_internal.h"
#include "upb/upb.h"
// Must be last.
#include "upb/port_def.inc"
typedef enum {
kUpb_EncodedType_Double = 0,
kUpb_EncodedType_Float = 1,
kUpb_EncodedType_Fixed32 = 2,
kUpb_EncodedType_Fixed64 = 3,
kUpb_EncodedType_SFixed32 = 4,
kUpb_EncodedType_SFixed64 = 5,
kUpb_EncodedType_Int32 = 6,
kUpb_EncodedType_UInt32 = 7,
kUpb_EncodedType_SInt32 = 8,
kUpb_EncodedType_Int64 = 9,
kUpb_EncodedType_UInt64 = 10,
kUpb_EncodedType_SInt64 = 11,
kUpb_EncodedType_Enum = 12,
kUpb_EncodedType_Bool = 13,
kUpb_EncodedType_Bytes = 14,
kUpb_EncodedType_String = 15,
kUpb_EncodedType_Group = 16,
kUpb_EncodedType_Message = 17,
kUpb_EncodedType_RepeatedBase = 20,
} upb_EncodedType;
typedef enum {
kUpb_EncodedFieldModifier_FlipPacked = 1 << 0,
kUpb_EncodedFieldModifier_IsClosedEnum = 1 << 1,
// upb only.
kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2,
kUpb_EncodedFieldModifier_IsRequired = 1 << 3,
} upb_EncodedFieldModifier;
enum {
kUpb_EncodedValue_MinField = ' ',
kUpb_EncodedValue_MaxField = 'K',
kUpb_EncodedValue_MinModifier = 'L',
kUpb_EncodedValue_MaxModifier = '[',
kUpb_EncodedValue_End = '^',
kUpb_EncodedValue_MinSkip = '_',
kUpb_EncodedValue_MaxSkip = '~',
kUpb_EncodedValue_OneofSeparator = '~',
kUpb_EncodedValue_FieldSeparator = '|',
kUpb_EncodedValue_MinOneofField = ' ',
kUpb_EncodedValue_MaxOneofField = 'b',
kUpb_EncodedValue_MaxEnumMask = 'A',
};
char upb_ToBase92(int8_t ch) {
static const char kUpb_ToBase92[] = {
' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=',
'>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '{', '|', '}', '~',
};
UPB_ASSERT(0 <= ch && ch < 92);
return kUpb_ToBase92[ch];
}
char upb_FromBase92(uint8_t ch) {
static const int8_t kUpb_FromBase92[] = {
0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
};
if (' ' > ch || ch > '~') return -1;
return kUpb_FromBase92[ch - ' '];
}
bool upb_IsTypePackable(upb_FieldType type) {
// clang-format off
static const unsigned kUnpackableTypes =
(1 << kUpb_FieldType_String) |
(1 << kUpb_FieldType_Bytes) |
(1 << kUpb_FieldType_Message) |
(1 << kUpb_FieldType_Group);
// clang-format on
return (1 << type) & ~kUnpackableTypes;
}
/** upb_MtDataEncoder *********************************************************/
typedef struct {
uint64_t present_values_mask;
uint32_t last_written_value;
} upb_MtDataEncoderInternal_EnumState;
typedef struct {
uint64_t msg_modifiers;
uint32_t last_field_num;
enum {
kUpb_OneofState_NotStarted,
kUpb_OneofState_StartedOneof,
kUpb_OneofState_EmittedOneofField,
} oneof_state;
} upb_MtDataEncoderInternal_MsgState;
typedef struct {
char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize.
union {
upb_MtDataEncoderInternal_EnumState enum_state;
upb_MtDataEncoderInternal_MsgState msg_state;
} state;
} upb_MtDataEncoderInternal;
static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
upb_MtDataEncoder* e, char* buf_start) {
UPB_ASSERT(sizeof(upb_MtDataEncoderInternal) <= sizeof(e->internal));
upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal;
ret->buf_start = buf_start;
return ret;
}
static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
if (ptr == e->end) return NULL;
*ptr++ = upb_ToBase92(ch);
return ptr;
}
static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
uint32_t val, int min, int max) {
int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1);
UPB_ASSERT(shift <= 6);
uint32_t mask = (1 << shift) - 1;
do {
uint32_t bits = val & mask;
ptr = upb_MtDataEncoder_Put(e, ptr, bits + upb_FromBase92(min));
if (!ptr) return NULL;
val >>= shift;
} while (val);
return ptr;
}
3 years ago
char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr,
uint64_t mod) {
if (mod) {
ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod,
kUpb_EncodedValue_MinModifier,
kUpb_EncodedValue_MaxModifier);
}
return ptr;
}
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
uint64_t msg_mod) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
in->state.msg_state.msg_modifiers = msg_mod;
in->state.msg_state.last_field_num = 0;
in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
3 years ago
return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
}
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
upb_FieldType type, uint32_t field_num,
uint64_t field_mod) {
static const char kUpb_TypeToEncoded[] = {
[kUpb_FieldType_Double] = kUpb_EncodedType_Double,
[kUpb_FieldType_Float] = kUpb_EncodedType_Float,
[kUpb_FieldType_Int64] = kUpb_EncodedType_Int64,
[kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64,
[kUpb_FieldType_Int32] = kUpb_EncodedType_Int32,
[kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64,
[kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32,
[kUpb_FieldType_Bool] = kUpb_EncodedType_Bool,
[kUpb_FieldType_String] = kUpb_EncodedType_String,
[kUpb_FieldType_Group] = kUpb_EncodedType_Group,
[kUpb_FieldType_Message] = kUpb_EncodedType_Message,
[kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes,
[kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32,
[kUpb_FieldType_Enum] = kUpb_EncodedType_Enum,
[kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32,
[kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64,
[kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32,
[kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64,
};
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (field_num <= in->state.msg_state.last_field_num) return NULL;
if (in->state.msg_state.last_field_num + 1 != field_num) {
// Put skip.
UPB_ASSERT(field_num > in->state.msg_state.last_field_num);
uint32_t skip = field_num - in->state.msg_state.last_field_num;
ptr = upb_MtDataEncoder_PutBase92Varint(
e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
if (!ptr) return NULL;
}
in->state.msg_state.last_field_num = field_num;
uint32_t encoded_modifiers = 0;
// Put field type.
if (type == kUpb_FieldType_Enum &&
!(field_mod & kUpb_FieldModifier_IsClosedEnum)) {
type = kUpb_FieldType_Int32;
}
int encoded_type = kUpb_TypeToEncoded[type];
if (field_mod & kUpb_FieldModifier_IsRepeated) {
// Repeated fields shift the type number up (unlike other modifiers which
// are bit flags).
encoded_type += kUpb_EncodedType_RepeatedBase;
if (upb_IsTypePackable(type)) {
bool field_is_packed = field_mod & kUpb_FieldModifier_IsPacked;
bool default_is_packed = in->state.msg_state.msg_modifiers &
kUpb_MessageModifier_DefaultIsPacked;
if (field_is_packed != default_is_packed) {
encoded_modifiers |= kUpb_EncodedFieldModifier_FlipPacked;
}
}
}
ptr = upb_MtDataEncoder_Put(e, ptr, encoded_type);
if (!ptr) return NULL;
if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
}
if (field_mod & kUpb_FieldModifier_IsRequired) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
}
return upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers);
}
char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (in->state.msg_state.oneof_state == kUpb_OneofState_NotStarted) {
ptr = upb_MtDataEncoder_Put(e, ptr, upb_FromBase92(kUpb_EncodedValue_End));
} else {
ptr = upb_MtDataEncoder_Put(
e, ptr, upb_FromBase92(kUpb_EncodedValue_OneofSeparator));
}
in->state.msg_state.oneof_state = kUpb_OneofState_StartedOneof;
return ptr;
}
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
uint32_t field_num) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (in->state.msg_state.oneof_state == kUpb_OneofState_EmittedOneofField) {
ptr = upb_MtDataEncoder_Put(
e, ptr, upb_FromBase92(kUpb_EncodedValue_FieldSeparator));
if (!ptr) return NULL;
}
ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, upb_ToBase92(0),
upb_ToBase92(63));
in->state.msg_state.oneof_state = kUpb_OneofState_EmittedOneofField;
return ptr;
}
void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, NULL);
in->state.enum_state.present_values_mask = 0;
in->state.enum_state.last_written_value = 0;
}
static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
char* ptr) {
upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
ptr = upb_MtDataEncoder_Put(e, ptr, in->state.enum_state.present_values_mask);
in->state.enum_state.present_values_mask = 0;
in->state.enum_state.last_written_value += 5;
return ptr;
}
char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
uint32_t val) {
// TODO(b/229641772): optimize this encoding.
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
UPB_ASSERT(val >= in->state.enum_state.last_written_value);
uint32_t delta = val - in->state.enum_state.last_written_value;
if (delta >= 5 && in->state.enum_state.present_values_mask) {
ptr = upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
if (!ptr) {
return NULL;
}
delta -= 5;
}
if (delta >= 5) {
ptr = upb_MtDataEncoder_PutBase92Varint(
e, ptr, delta, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip);
in->state.enum_state.last_written_value += delta;
delta = 0;
}
UPB_ASSERT((in->state.enum_state.present_values_mask >> delta) == 0);
in->state.enum_state.present_values_mask |= 1ULL << delta;
return ptr;
}
char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
if (!in->state.enum_state.present_values_mask) return ptr;
return upb_MtDataEncoder_FlushDenseEnumMask(e, ptr);
}
const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber(
const upb_MiniTable* table, uint32_t number) {
int n = table->field_count;
for (int i = 0; i < n; i++) {
if (table->fields[i].number == number) {
return &table->fields[i];
}
}
return NULL;
}
/** Data decoder **************************************************************/
// Note: we sort by this number when calculating layout order.
typedef enum {
kUpb_LayoutItemType_OneofCase, // Oneof case.
kUpb_LayoutItemType_OneofField, // Oneof field data.
kUpb_LayoutItemType_Field, // Non-oneof field data.
kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field,
} upb_LayoutItemType;
#define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1)
typedef struct {
// Index of the corresponding field. When this is a oneof field, the field's
// offset will be the index of the next field in a linked list.
uint16_t field_index;
uint16_t offset;
upb_FieldRep rep;
upb_LayoutItemType type;
} upb_LayoutItem;
typedef struct {
upb_LayoutItem* data;
size_t size;
size_t capacity;
} upb_LayoutItemVector;
typedef struct {
const char* end;
upb_MiniTable* table;
upb_MiniTable_Field* fields;
upb_MiniTablePlatform platform;
upb_LayoutItemVector vec;
upb_Arena* arena;
upb_Status* status;
// When building enums.
upb_MiniTable_Enum* enum_table;
uint32_t enum_value_count;
uint32_t enum_data_count;
uint32_t enum_data_capacity;
jmp_buf err;
} upb_MtDecoder;
UPB_PRINTF(2, 3)
UPB_NORETURN static void upb_MtDecoder_ErrorFormat(upb_MtDecoder* d,
const char* fmt, ...) {
va_list argp;
upb_Status_SetErrorMessage(d->status, "Error building mini table: ");
va_start(argp, fmt);
upb_Status_VAppendErrorFormat(d->status, fmt, argp);
va_end(argp);
UPB_LONGJMP(d->err, 1);
}
static void upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder* d, const void* ptr) {
if (!ptr) upb_MtDecoder_ErrorFormat(d, "Out of memory");
}
// In each field's offset, we temporarily store a presence classifier:
enum PresenceClass {
kNoPresence = 0,
kHasbitPresence = 1,
kRequiredPresence = 2,
kOneofBase = 3,
// Negative values refer to a specific oneof with that number. Positive
// values >= kOneofBase indicate that this field is in a oneof, and specify
// the next field in this oneof's linked list.
};
static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d,
const char* ptr,
char first_ch, uint8_t min,
uint8_t max,
uint32_t* out_val) {
uint32_t val = 0;
uint32_t shift = 0;
const int bits_per_char =
_upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min));
char ch = first_ch;
while (1) {
uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min);
val |= bits << shift;
if (ptr == d->end || *ptr < min || max < *ptr) {
*out_val = val;
return ptr;
}
ch = *ptr++;
shift += bits_per_char;
if (shift >= 32) upb_MtDecoder_ErrorFormat(d, "Overlong varint");
}
}
3 years ago
static bool upb_MiniTable_HasSub(upb_MiniTable_Field* field,
uint64_t msg_modifiers) {
switch (field->descriptortype) {
case kUpb_FieldType_Message:
case kUpb_FieldType_Group:
case kUpb_FieldType_Enum:
return true;
case kUpb_FieldType_String:
if (!(msg_modifiers & kUpb_MessageModifier_ValidateUtf8)) {
field->descriptortype = kUpb_FieldType_Bytes;
}
return false;
default:
3 years ago
return false;
}
}
static bool upb_MtDecoder_FieldIsPackable(upb_MiniTable_Field* field) {
return (field->mode & kUpb_FieldMode_Array) &&
upb_IsTypePackable(field->descriptortype);
}
3 years ago
static void upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field* field,
upb_FieldType type, uint32_t* sub_count,
uint64_t msg_modifiers) {
field->descriptortype = type;
if (upb_MiniTable_HasSub(field, msg_modifiers)) {
field->submsg_index = sub_count ? (*sub_count)++ : 0;
} else {
field->submsg_index = kUpb_NoSub;
}
if (upb_MtDecoder_FieldIsPackable(field) &&
(msg_modifiers & kUpb_MessageModifier_DefaultIsPacked)) {
field->mode |= kUpb_LabelFlags_IsPacked;
}
}
static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
upb_MiniTable_Field* field,
uint64_t msg_modifiers,
uint32_t* sub_count) {
static const char kUpb_EncodedToFieldRep[] = {
[kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte,
[kUpb_EncodedType_String] = kUpb_FieldRep_StringView,
[kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer,
[kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer,
[kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView,
[kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte,
[kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte,
[kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte,
};
static const char kUpb_EncodedToType[] = {
[kUpb_EncodedType_Double] = kUpb_FieldType_Double,
[kUpb_EncodedType_Float] = kUpb_FieldType_Float,
[kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
[kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
[kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
[kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
[kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
[kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
[kUpb_EncodedType_String] = kUpb_FieldType_String,
[kUpb_EncodedType_Group] = kUpb_FieldType_Group,
[kUpb_EncodedType_Message] = kUpb_FieldType_Message,
[kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
[kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
[kUpb_EncodedType_Enum] = kUpb_FieldType_Enum,
[kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
[kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
[kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
[kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
};
int8_t type = upb_FromBase92(ch);
if (ch >= upb_ToBase92(kUpb_EncodedType_RepeatedBase)) {
type -= kUpb_EncodedType_RepeatedBase;
field->mode = kUpb_FieldMode_Array;
field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift;
field->offset = kNoPresence;
} else {
if (type >= sizeof(kUpb_EncodedToFieldRep)) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
UPB_UNREACHABLE();
}
field->mode = kUpb_FieldMode_Scalar;
field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
field->offset = kHasbitPresence;
}
if (type >= sizeof(kUpb_EncodedToType)) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
UPB_UNREACHABLE();
}
3 years ago
upb_MiniTable_SetTypeAndSub(field, kUpb_EncodedToType[type], sub_count,
msg_modifiers);
}
3 years ago
static void upb_MtDecoder_ModifyField(upb_MtDecoder* d,
uint32_t message_modifiers,
uint32_t field_modifiers,
upb_MiniTable_Field* field) {
if (field_modifiers & kUpb_EncodedFieldModifier_FlipPacked) {
if (!upb_MtDecoder_FieldIsPackable(field)) {
upb_MtDecoder_ErrorFormat(
d, "Cannot flip packed on unpackable field %" PRIu32, field->number);
UPB_UNREACHABLE();
}
field->mode ^= kUpb_LabelFlags_IsPacked;
}
bool singular = field_modifiers & kUpb_EncodedFieldModifier_IsProto3Singular;
bool required = field_modifiers & kUpb_EncodedFieldModifier_IsRequired;
// Validate.
if ((singular || required) && field->offset != kHasbitPresence) {
upb_MtDecoder_ErrorFormat(
d, "Invalid modifier(s) for repeated field %" PRIu32, field->number);
UPB_UNREACHABLE();
}
if (singular && required) {
upb_MtDecoder_ErrorFormat(
d, "Field %" PRIu32 " cannot be both singular and required",
field->number);
UPB_UNREACHABLE();
}
if (singular) field->offset = kNoPresence;
if (required) {
field->offset = kRequiredPresence;
}
}
static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) {
if (d->vec.size == d->vec.capacity) {
size_t new_cap = UPB_MAX(8, d->vec.size * 2);
d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data));
upb_MtDecoder_CheckOutOfMemory(d, d->vec.data);
d->vec.capacity = new_cap;
}
d->vec.data[d->vec.size++] = item;
}
static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) {
if (item.field_index == kUpb_LayoutItem_IndexSentinel) {
upb_MtDecoder_ErrorFormat(d, "Empty oneof");
UPB_UNREACHABLE();
}
item.field_index -= kOneofBase;
// Push oneof data.
item.type = kUpb_LayoutItemType_OneofField;
upb_MtDecoder_PushItem(d, item);
// Push oneof case.
item.rep = kUpb_FieldRep_4Byte; // Field Number.
item.type = kUpb_LayoutItemType_OneofCase;
upb_MtDecoder_PushItem(d, item);
}
size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep,
upb_MiniTablePlatform platform) {
static const uint8_t kRepToSize32[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 8,
[kUpb_FieldRep_8Byte] = 8,
};
static const uint8_t kRepToSize64[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 16,
[kUpb_FieldRep_8Byte] = 8,
};
UPB_ASSERT(sizeof(upb_StringView) ==
UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]);
return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep]
: kRepToSize64[rep];
}
size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep,
upb_MiniTablePlatform platform) {
static const uint8_t kRepToAlign32[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 4,
[kUpb_FieldRep_8Byte] = 8,
};
static const uint8_t kRepToAlign64[] = {
[kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4,
[kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 8,
[kUpb_FieldRep_8Byte] = 8,
};
UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) ==
UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]);
return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep]
: kRepToAlign64[rep];
}
static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d,
const char* ptr,
char first_ch,
upb_LayoutItem* item) {
uint32_t field_num;
ptr = upb_MiniTable_DecodeBase92Varint(
d, ptr, first_ch, kUpb_EncodedValue_MinOneofField,
kUpb_EncodedValue_MaxOneofField, &field_num);
upb_MiniTable_Field* f =
(void*)upb_MiniTable_FindFieldByNumber(d->table, field_num);
if (!f) {
upb_MtDecoder_ErrorFormat(d,
"Couldn't add field number %" PRIu32
" to oneof, no such field number.",
field_num);
UPB_UNREACHABLE();
}
if (f->offset != kHasbitPresence) {
upb_MtDecoder_ErrorFormat(
d,
"Cannot add repeated, required, or singular field %" PRIu32
" to oneof.",
field_num);
UPB_UNREACHABLE();
}
// Oneof storage must be large enough to accommodate the largest member.
int rep = f->mode >> kUpb_FieldRep_Shift;
if (upb_MtDecoder_SizeOfRep(rep, d->platform) >
upb_MtDecoder_SizeOfRep(item->rep, d->platform)) {
item->rep = rep;
}
// Prepend this field to the linked list.
f->offset = item->field_index;
item->field_index = (f - d->fields) + kOneofBase;
return ptr;
}
static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d,
const char* ptr) {
upb_LayoutItem item = {.rep = 0,
.field_index = kUpb_LayoutItem_IndexSentinel};
while (ptr < d->end) {
char ch = *ptr++;
if (ch == kUpb_EncodedValue_FieldSeparator) {
// Field separator, no action needed.
} else if (ch == kUpb_EncodedValue_OneofSeparator) {
// End of oneof.
upb_MtDecoder_PushOneof(d, item);
item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof.
} else {
ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item);
}
}
// Push final oneof.
upb_MtDecoder_PushOneof(d, item);
return ptr;
}
static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d,
const char* ptr, char first_ch,
upb_MiniTable_Field* last_field,
uint64_t* msg_modifiers) {
uint32_t mod;
ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, first_ch,
kUpb_EncodedValue_MinModifier,
kUpb_EncodedValue_MaxModifier, &mod);
if (last_field) {
upb_MtDecoder_ModifyField(d, *msg_modifiers, mod, last_field);
} else {
if (!d->table) {
upb_MtDecoder_ErrorFormat(d, "Extensions cannot have message modifiers");
UPB_UNREACHABLE();
}
*msg_modifiers = mod;
}
return ptr;
}
static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) {
size_t subs_bytes = sizeof(*d->table->subs) * sub_count;
d->table->subs = upb_Arena_Malloc(d->arena, subs_bytes);
upb_MtDecoder_CheckOutOfMemory(d, d->table->subs);
}
static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
size_t len, void* fields,
size_t field_size, uint16_t* field_count,
uint32_t* sub_count) {
uint64_t msg_modifiers = 0;
uint32_t last_field_number = 0;
upb_MiniTable_Field* last_field = NULL;
bool need_dense_below = d->table != NULL;
d->end = UPB_PTRADD(ptr, len);
while (ptr < d->end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxField) {
if (!d->table && last_field) return --ptr;
upb_MiniTable_Field* field = fields;
*field_count += 1;
fields = (char*)fields + field_size;
field->number = ++last_field_number;
last_field = field;
upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_count);
} else if (kUpb_EncodedValue_MinModifier <= ch &&
ch <= kUpb_EncodedValue_MaxModifier) {
ptr = upb_MtDecoder_ParseModifier(d, ptr, ch, last_field, &msg_modifiers);
if (msg_modifiers & kUpb_MessageModifier_IsExtendable) {
d->table->ext |= kUpb_ExtMode_Extendable;
}
} else if (ch == kUpb_EncodedValue_End) {
if (!d->table) {
upb_MtDecoder_ErrorFormat(d, "Extensions cannot have oneofs.");
UPB_UNREACHABLE();
}
ptr = upb_MtDecoder_DecodeOneofs(d, ptr);
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
if (need_dense_below) {
d->table->dense_below = d->table->field_count;
need_dense_below = false;
}
uint32_t skip;
ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, ch,
kUpb_EncodedValue_MinSkip,
kUpb_EncodedValue_MaxSkip, &skip);
last_field_number += skip;
last_field_number--; // Next field seen will increment.
}
}
if (need_dense_below) {
d->table->dense_below = d->table->field_count;
}
return ptr;
}
static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
size_t len) {
// Buffer length is an upper bound on the number of fields. We will return
// what we don't use.
d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len);
upb_MtDecoder_CheckOutOfMemory(d, d->fields);
uint32_t sub_count = 0;
d->table->field_count = 0;
d->table->fields = d->fields;
upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields),
&d->table->field_count, &sub_count);
upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len,
sizeof(*d->fields) * d->table->field_count);
d->table->fields = d->fields;
upb_MtDecoder_AllocateSubs(d, sub_count);
}
int upb_MtDecoder_CompareFields(const void* _a, const void* _b) {
const upb_LayoutItem* a = _a;
const upb_LayoutItem* b = _b;
// Currently we just sort by:
// 1. rep (smallest fields first)
// 2. type (oneof cases first)
// 2. field_index (smallest numbers first)
// The main goal of this is to reduce space lost to padding.
// Later we may have more subtle reasons to prefer a different ordering.
const int rep_bits = _upb_Log2Ceiling(kUpb_FieldRep_Max);
const int type_bits = _upb_Log2Ceiling(kUpb_LayoutItemType_Max);
const int idx_bits = (sizeof(a->field_index) * 8);
UPB_ASSERT(idx_bits + rep_bits + type_bits < 32);
#define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx
uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index);
uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index);
assert(a_packed != b_packed);
#undef UPB_COMBINE
return a_packed < b_packed ? -1 : 1;
}
static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) {
// Add items for all non-oneof fields (oneofs were already added).
int n = d->table->field_count;
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* f = &d->fields[i];
if (f->offset >= kOneofBase) continue;
upb_LayoutItem item = {.field_index = i,
.rep = f->mode >> kUpb_FieldRep_Shift,
.type = kUpb_LayoutItemType_Field};
upb_MtDecoder_PushItem(d, item);
}
if (d->vec.size) {
qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data),
upb_MtDecoder_CompareFields);
}
return true;
}
static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) {
return (n + d - 1) / d;
}
static void upb_MtDecoder_AssignHasbits(upb_MiniTable* ret) {
int n = ret->field_count;
int last_hasbit = 0; // 0 cannot be used.
// First assign required fields, which must have the lowest hasbits.
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
if (field->offset == kRequiredPresence) {
field->presence = ++last_hasbit;
3 years ago
} else if (field->offset == kNoPresence) {
field->presence = 0;
}
}
ret->required_count = last_hasbit;
// Next assign non-required hasbit fields.
for (int i = 0; i < n; i++) {
upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i];
if (field->offset == kHasbitPresence) {
field->presence = ++last_hasbit;
}
}
ret->size = last_hasbit ? upb_MiniTable_DivideRoundUp(last_hasbit + 1, 8) : 0;
}
size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) {
size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform);
size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform);
size_t ret = UPB_ALIGN_UP(d->table->size, align);
d->table->size = ret + size;
return ret;
}
3 years ago
static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size);
// Compute offsets.
for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
item->offset = upb_MtDecoder_Place(d, item->rep);
}
// Assign oneof case offsets. We must do these first, since assigning
// actual offsets will overwrite the links of the linked list.
for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
if (item->type != kUpb_LayoutItemType_OneofCase) continue;
upb_MiniTable_Field* f = &d->fields[item->field_index];
while (true) {
f->presence = ~item->offset;
if (f->offset == kUpb_LayoutItem_IndexSentinel) break;
UPB_ASSERT(f->offset - kOneofBase < d->table->field_count);
f = &d->fields[f->offset - kOneofBase];
}
}
// Assign offsets.
for (upb_LayoutItem* item = d->vec.data; item < end; item++) {
upb_MiniTable_Field* f = &d->fields[item->field_index];
switch (item->type) {
case kUpb_LayoutItemType_OneofField:
while (true) {
uint16_t next_offset = f->offset;
f->offset = item->offset;
if (next_offset == kUpb_LayoutItem_IndexSentinel) break;
f = &d->fields[next_offset - kOneofBase];
}
break;
case kUpb_LayoutItemType_Field:
f->offset = item->offset;
break;
default:
break;
}
}
3 years ago
// The fasttable parser (supported on 64-bit only) depends on this being a
// multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
//
// On 32-bit we could potentially make this smaller, but there is no
// compelling reason to optimize this right now.
d->table->size = UPB_ALIGN_UP(d->table->size, 8);
}
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, void** buf,
3 years ago
size_t* buf_size,
upb_Status* status) {
upb_MtDecoder decoder = {
.platform = platform,
.vec =
{
.data = *buf,
.capacity = *buf_size / sizeof(*decoder.vec.data),
.size = 0,
},
.arena = arena,
.status = status,
.table = upb_Arena_Malloc(arena, sizeof(*decoder.table)),
};
if (UPB_SETJMP(decoder.err)) {
decoder.table = NULL;
goto done;
}
upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);
decoder.table->size = 0;
decoder.table->field_count = 0;
decoder.table->ext = kUpb_ExtMode_NonExtendable;
decoder.table->dense_below = 0;
Fixed bug when parsing an unknown value in a proto2 enum extension. #fuzzing Proto2 enum parsing is the only case where we have to look at the wire value (not merely the tag) to decide whether the field is known or unknown. If the value is unknown, we need to put the value in the Unknown Fields, but for an extension we no longer have easy access to the message, because for extensions we replace the `msg` pointer with a pointer to the extension. The bug occurred when we were treating the fake `upb_Message*` (which was actually a pointer to an extension) as a real `upb_Message*` that can have unknown fields. This CL fixes the problem by preserving the true message pointer in `d->unknown_msg` when we are parsing an extension. This also required fixing a bug in MiniTable building when fasttables are enabled. We need to set the table_mask to `-1` to disable fasttable parsing, not `0`. For unknown reasons, this CL appears to speed up parsing somewhat significantly. Ideally we should be tracking parsing performance better over time, as it is possible this is merely regaining performance that was lost at a different time: ``` benchy --reference=srcfs third_party/upb/benchmarks:benchmark 10 / 10 [=================================================================================================================] 100.00% 2m32s (Generated by http://go/benchy. Settings: --runs 5 --reference "srcfs") name old cpu/op new cpu/op delta BM_ArenaOneAlloc 23.9ns ± 6% 23.7ns ± 4% ~ (p=0.180 n=53+51) BM_ArenaInitialBlockOneAlloc 7.62ns ± 4% 7.70ns ± 5% +0.99% (p=0.024 n=59+60) BM_LoadAdsDescriptor_Upb<NoLayout> 6.60ms ±10% 6.57ms ± 8% ~ (p=0.607 n=47+54) BM_LoadAdsDescriptor_Upb<WithLayout> 6.92ms ± 5% 6.88ms ± 8% ~ (p=0.257 n=54+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 14.2ms ± 8% 14.0ms ± 7% -1.38% (p=0.025 n=58+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 14.3ms ± 8% 14.2ms ± 8% -1.16% (p=0.031 n=58+57) BM_Parse_Upb_FileDesc<UseArena, Copy> 15.9µs ± 4% 14.6µs ± 4% -7.85% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 14.5µs ± 4% 13.3µs ± 5% -8.50% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 15.7µs ± 4% 14.4µs ± 5% -7.99% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 14.2µs ± 5% 13.0µs ± 4% -8.56% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 26.3µs ± 4% 26.2µs ± 4% ~ (p=0.195 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 13.3µs ± 5% 13.2µs ± 4% ~ (p=0.085 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.9µs ± 4% 12.8µs ± 3% -0.66% (p=0.023 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 10.9µs ± 6% 10.9µs ± 4% ~ (p=0.063 n=59+58) BM_SerializeDescriptor_Proto2 7.57µs ± 6% 7.62µs ± 6% ~ (p=0.147 n=57+58) BM_SerializeDescriptor_Upb 12.8µs ± 4% 12.8µs ± 4% ~ (p=0.163 n=59+56) name old time/op new time/op delta BM_ArenaOneAlloc 23.9ns ± 5% 23.7ns ± 4% ~ (p=0.172 n=53+51) BM_ArenaInitialBlockOneAlloc 7.62ns ± 4% 7.70ns ± 5% +1.02% (p=0.017 n=59+60) BM_LoadAdsDescriptor_Upb<NoLayout> 6.60ms ±10% 6.58ms ± 8% ~ (p=0.727 n=47+55) BM_LoadAdsDescriptor_Upb<WithLayout> 6.92ms ± 5% 6.88ms ± 8% ~ (p=0.260 n=54+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 14.2ms ± 7% 14.0ms ± 7% -1.40% (p=0.019 n=58+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 14.3ms ± 8% 14.2ms ± 8% -1.13% (p=0.037 n=58+57) BM_Parse_Upb_FileDesc<UseArena, Copy> 15.9µs ± 4% 14.6µs ± 3% -7.88% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 14.5µs ± 4% 13.3µs ± 5% -8.46% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 15.7µs ± 4% 14.4µs ± 5% -7.99% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 14.2µs ± 5% 13.0µs ± 4% -8.56% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 26.3µs ± 4% 26.2µs ± 4% ~ (p=0.224 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 13.3µs ± 5% 13.2µs ± 4% ~ (p=0.098 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.9µs ± 4% 12.8µs ± 3% -0.68% (p=0.015 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 10.9µs ± 6% 10.9µs ± 4% ~ (p=0.052 n=59+58) BM_SerializeDescriptor_Proto2 7.56µs ± 6% 7.62µs ± 6% ~ (p=0.111 n=58+58) BM_SerializeDescriptor_Upb 12.8µs ± 4% 12.8µs ± 4% ~ (p=0.241 n=56+56) name old allocs/op new allocs/op delta BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<NoLayout> 5.98k ± 0% 5.98k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<WithLayout> 5.98k ± 0% 5.98k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<NoLayout> 80.9k ± 0% 80.9k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<WithLayout> 82.1k ± 0% 82.1k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, UseArena, Copy> 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta BM_ArenaOneAlloc 344 ± 0% 344 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<NoLayout> 9.60M ± 0% 9.60M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<WithLayout> 9.68M ± 0% 9.68M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<NoLayout> 6.41M ± 0% 6.41M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<WithLayout> 6.44M ± 0% 6.44M ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, UseArena, Copy> 40.7k ± 0% 40.7k ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) name old speed new speed delta BM_LoadAdsDescriptor_Upb<NoLayout> 113MB/s ± 9% 113MB/s ± 8% ~ (p=0.712 n=47+55) BM_LoadAdsDescriptor_Upb<WithLayout> 107MB/s ± 8% 108MB/s ± 8% ~ (p=0.200 n=55+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 52.5MB/s ± 8% 53.3MB/s ± 7% +1.51% (p=0.018 n=59+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 51.9MB/s ± 7% 52.4MB/s ± 8% +1.01% (p=0.050 n=58+58) BM_Parse_Upb_FileDesc<UseArena, Copy> 473MB/s ± 4% 514MB/s ± 4% +8.52% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 518MB/s ± 4% 566MB/s ± 5% +9.30% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 480MB/s ± 4% 521MB/s ± 5% +8.69% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 528MB/s ± 4% 578MB/s ± 4% +9.36% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 286MB/s ± 4% 287MB/s ± 4% ~ (p=0.195 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 566MB/s ± 5% 570MB/s ± 4% ~ (p=0.085 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 583MB/s ± 5% 587MB/s ± 3% +0.64% (p=0.023 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 688MB/s ± 6% 693MB/s ± 4% ~ (p=0.063 n=59+58) BM_SerializeDescriptor_Proto2 995MB/s ± 6% 988MB/s ± 5% ~ (p=0.147 n=57+58) BM_SerializeDescriptor_Upb 586MB/s ± 4% 589MB/s ± 4% ~ (p=0.163 n=59+56) ``` PiperOrigin-RevId: 462022073
2 years ago
decoder.table->table_mask = -1;
decoder.table->required_count = 0;
upb_MtDecoder_ParseMessage(&decoder, data, len);
upb_MtDecoder_AssignHasbits(decoder.table);
upb_MtDecoder_SortLayoutItems(&decoder);
upb_MtDecoder_AssignOffsets(&decoder);
done:
*buf = decoder.vec.data;
*buf_size = decoder.vec.capacity / sizeof(*decoder.vec.data);
return decoder.table;
}
upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
upb_Arena* arena) {
upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
if (!ret) return NULL;
ret->size = 0;
ret->field_count = 0;
ret->ext = kUpb_ExtMode_IsMessageSet;
ret->dense_below = 0;
Fixed bug when parsing an unknown value in a proto2 enum extension. #fuzzing Proto2 enum parsing is the only case where we have to look at the wire value (not merely the tag) to decide whether the field is known or unknown. If the value is unknown, we need to put the value in the Unknown Fields, but for an extension we no longer have easy access to the message, because for extensions we replace the `msg` pointer with a pointer to the extension. The bug occurred when we were treating the fake `upb_Message*` (which was actually a pointer to an extension) as a real `upb_Message*` that can have unknown fields. This CL fixes the problem by preserving the true message pointer in `d->unknown_msg` when we are parsing an extension. This also required fixing a bug in MiniTable building when fasttables are enabled. We need to set the table_mask to `-1` to disable fasttable parsing, not `0`. For unknown reasons, this CL appears to speed up parsing somewhat significantly. Ideally we should be tracking parsing performance better over time, as it is possible this is merely regaining performance that was lost at a different time: ``` benchy --reference=srcfs third_party/upb/benchmarks:benchmark 10 / 10 [=================================================================================================================] 100.00% 2m32s (Generated by http://go/benchy. Settings: --runs 5 --reference "srcfs") name old cpu/op new cpu/op delta BM_ArenaOneAlloc 23.9ns ± 6% 23.7ns ± 4% ~ (p=0.180 n=53+51) BM_ArenaInitialBlockOneAlloc 7.62ns ± 4% 7.70ns ± 5% +0.99% (p=0.024 n=59+60) BM_LoadAdsDescriptor_Upb<NoLayout> 6.60ms ±10% 6.57ms ± 8% ~ (p=0.607 n=47+54) BM_LoadAdsDescriptor_Upb<WithLayout> 6.92ms ± 5% 6.88ms ± 8% ~ (p=0.257 n=54+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 14.2ms ± 8% 14.0ms ± 7% -1.38% (p=0.025 n=58+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 14.3ms ± 8% 14.2ms ± 8% -1.16% (p=0.031 n=58+57) BM_Parse_Upb_FileDesc<UseArena, Copy> 15.9µs ± 4% 14.6µs ± 4% -7.85% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 14.5µs ± 4% 13.3µs ± 5% -8.50% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 15.7µs ± 4% 14.4µs ± 5% -7.99% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 14.2µs ± 5% 13.0µs ± 4% -8.56% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 26.3µs ± 4% 26.2µs ± 4% ~ (p=0.195 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 13.3µs ± 5% 13.2µs ± 4% ~ (p=0.085 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.9µs ± 4% 12.8µs ± 3% -0.66% (p=0.023 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 10.9µs ± 6% 10.9µs ± 4% ~ (p=0.063 n=59+58) BM_SerializeDescriptor_Proto2 7.57µs ± 6% 7.62µs ± 6% ~ (p=0.147 n=57+58) BM_SerializeDescriptor_Upb 12.8µs ± 4% 12.8µs ± 4% ~ (p=0.163 n=59+56) name old time/op new time/op delta BM_ArenaOneAlloc 23.9ns ± 5% 23.7ns ± 4% ~ (p=0.172 n=53+51) BM_ArenaInitialBlockOneAlloc 7.62ns ± 4% 7.70ns ± 5% +1.02% (p=0.017 n=59+60) BM_LoadAdsDescriptor_Upb<NoLayout> 6.60ms ±10% 6.58ms ± 8% ~ (p=0.727 n=47+55) BM_LoadAdsDescriptor_Upb<WithLayout> 6.92ms ± 5% 6.88ms ± 8% ~ (p=0.260 n=54+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 14.2ms ± 7% 14.0ms ± 7% -1.40% (p=0.019 n=58+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 14.3ms ± 8% 14.2ms ± 8% -1.13% (p=0.037 n=58+57) BM_Parse_Upb_FileDesc<UseArena, Copy> 15.9µs ± 4% 14.6µs ± 3% -7.88% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 14.5µs ± 4% 13.3µs ± 5% -8.46% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 15.7µs ± 4% 14.4µs ± 5% -7.99% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 14.2µs ± 5% 13.0µs ± 4% -8.56% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 26.3µs ± 4% 26.2µs ± 4% ~ (p=0.224 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 13.3µs ± 5% 13.2µs ± 4% ~ (p=0.098 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.9µs ± 4% 12.8µs ± 3% -0.68% (p=0.015 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 10.9µs ± 6% 10.9µs ± 4% ~ (p=0.052 n=59+58) BM_SerializeDescriptor_Proto2 7.56µs ± 6% 7.62µs ± 6% ~ (p=0.111 n=58+58) BM_SerializeDescriptor_Upb 12.8µs ± 4% 12.8µs ± 4% ~ (p=0.241 n=56+56) name old allocs/op new allocs/op delta BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<NoLayout> 5.98k ± 0% 5.98k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<WithLayout> 5.98k ± 0% 5.98k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<NoLayout> 80.9k ± 0% 80.9k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<WithLayout> 82.1k ± 0% 82.1k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, UseArena, Copy> 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta BM_ArenaOneAlloc 344 ± 0% 344 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<NoLayout> 9.60M ± 0% 9.60M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<WithLayout> 9.68M ± 0% 9.68M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<NoLayout> 6.41M ± 0% 6.41M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<WithLayout> 6.44M ± 0% 6.44M ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, UseArena, Copy> 40.7k ± 0% 40.7k ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) name old speed new speed delta BM_LoadAdsDescriptor_Upb<NoLayout> 113MB/s ± 9% 113MB/s ± 8% ~ (p=0.712 n=47+55) BM_LoadAdsDescriptor_Upb<WithLayout> 107MB/s ± 8% 108MB/s ± 8% ~ (p=0.200 n=55+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 52.5MB/s ± 8% 53.3MB/s ± 7% +1.51% (p=0.018 n=59+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 51.9MB/s ± 7% 52.4MB/s ± 8% +1.01% (p=0.050 n=58+58) BM_Parse_Upb_FileDesc<UseArena, Copy> 473MB/s ± 4% 514MB/s ± 4% +8.52% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 518MB/s ± 4% 566MB/s ± 5% +9.30% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 480MB/s ± 4% 521MB/s ± 5% +8.69% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 528MB/s ± 4% 578MB/s ± 4% +9.36% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 286MB/s ± 4% 287MB/s ± 4% ~ (p=0.195 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 566MB/s ± 5% 570MB/s ± 4% ~ (p=0.085 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 583MB/s ± 5% 587MB/s ± 3% +0.64% (p=0.023 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 688MB/s ± 6% 693MB/s ± 4% ~ (p=0.063 n=59+58) BM_SerializeDescriptor_Proto2 995MB/s ± 6% 988MB/s ± 5% ~ (p=0.147 n=57+58) BM_SerializeDescriptor_Upb 586MB/s ± 4% 589MB/s ± 4% ~ (p=0.163 n=59+56) ``` PiperOrigin-RevId: 462022073
2 years ago
ret->table_mask = -1;
ret->required_count = 0;
return ret;
}
upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
upb_FieldType value_type,
bool value_is_proto3_enum,
upb_MiniTablePlatform platform,
upb_Arena* arena) {
upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2);
if (!ret || !fields) return NULL;
upb_MiniTable_Sub* subs = NULL;
if (value_is_proto3_enum) value_type = kUpb_FieldType_Int32;
3 years ago
if (value_type == kUpb_FieldType_Message ||
3 years ago
value_type == kUpb_FieldType_Group || value_type == kUpb_FieldType_Enum) {
subs = upb_Arena_Malloc(arena, sizeof(*subs));
if (!subs) return NULL;
}
size_t field_size =
upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform);
fields[0].number = 1;
fields[1].number = 2;
fields[0].mode = kUpb_FieldMode_Scalar;
fields[1].mode = kUpb_FieldMode_Scalar;
fields[0].presence = 0;
fields[1].presence = 0;
fields[0].offset = 0;
fields[1].offset = field_size;
3 years ago
upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0);
upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0);
ret->size = UPB_ALIGN_UP(2 * field_size, 8);
ret->field_count = 2;
ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry;
ret->dense_below = 2;
Fixed bug when parsing an unknown value in a proto2 enum extension. #fuzzing Proto2 enum parsing is the only case where we have to look at the wire value (not merely the tag) to decide whether the field is known or unknown. If the value is unknown, we need to put the value in the Unknown Fields, but for an extension we no longer have easy access to the message, because for extensions we replace the `msg` pointer with a pointer to the extension. The bug occurred when we were treating the fake `upb_Message*` (which was actually a pointer to an extension) as a real `upb_Message*` that can have unknown fields. This CL fixes the problem by preserving the true message pointer in `d->unknown_msg` when we are parsing an extension. This also required fixing a bug in MiniTable building when fasttables are enabled. We need to set the table_mask to `-1` to disable fasttable parsing, not `0`. For unknown reasons, this CL appears to speed up parsing somewhat significantly. Ideally we should be tracking parsing performance better over time, as it is possible this is merely regaining performance that was lost at a different time: ``` benchy --reference=srcfs third_party/upb/benchmarks:benchmark 10 / 10 [=================================================================================================================] 100.00% 2m32s (Generated by http://go/benchy. Settings: --runs 5 --reference "srcfs") name old cpu/op new cpu/op delta BM_ArenaOneAlloc 23.9ns ± 6% 23.7ns ± 4% ~ (p=0.180 n=53+51) BM_ArenaInitialBlockOneAlloc 7.62ns ± 4% 7.70ns ± 5% +0.99% (p=0.024 n=59+60) BM_LoadAdsDescriptor_Upb<NoLayout> 6.60ms ±10% 6.57ms ± 8% ~ (p=0.607 n=47+54) BM_LoadAdsDescriptor_Upb<WithLayout> 6.92ms ± 5% 6.88ms ± 8% ~ (p=0.257 n=54+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 14.2ms ± 8% 14.0ms ± 7% -1.38% (p=0.025 n=58+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 14.3ms ± 8% 14.2ms ± 8% -1.16% (p=0.031 n=58+57) BM_Parse_Upb_FileDesc<UseArena, Copy> 15.9µs ± 4% 14.6µs ± 4% -7.85% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 14.5µs ± 4% 13.3µs ± 5% -8.50% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 15.7µs ± 4% 14.4µs ± 5% -7.99% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 14.2µs ± 5% 13.0µs ± 4% -8.56% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 26.3µs ± 4% 26.2µs ± 4% ~ (p=0.195 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 13.3µs ± 5% 13.2µs ± 4% ~ (p=0.085 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.9µs ± 4% 12.8µs ± 3% -0.66% (p=0.023 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 10.9µs ± 6% 10.9µs ± 4% ~ (p=0.063 n=59+58) BM_SerializeDescriptor_Proto2 7.57µs ± 6% 7.62µs ± 6% ~ (p=0.147 n=57+58) BM_SerializeDescriptor_Upb 12.8µs ± 4% 12.8µs ± 4% ~ (p=0.163 n=59+56) name old time/op new time/op delta BM_ArenaOneAlloc 23.9ns ± 5% 23.7ns ± 4% ~ (p=0.172 n=53+51) BM_ArenaInitialBlockOneAlloc 7.62ns ± 4% 7.70ns ± 5% +1.02% (p=0.017 n=59+60) BM_LoadAdsDescriptor_Upb<NoLayout> 6.60ms ±10% 6.58ms ± 8% ~ (p=0.727 n=47+55) BM_LoadAdsDescriptor_Upb<WithLayout> 6.92ms ± 5% 6.88ms ± 8% ~ (p=0.260 n=54+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 14.2ms ± 7% 14.0ms ± 7% -1.40% (p=0.019 n=58+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 14.3ms ± 8% 14.2ms ± 8% -1.13% (p=0.037 n=58+57) BM_Parse_Upb_FileDesc<UseArena, Copy> 15.9µs ± 4% 14.6µs ± 3% -7.88% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 14.5µs ± 4% 13.3µs ± 5% -8.46% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 15.7µs ± 4% 14.4µs ± 5% -7.99% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 14.2µs ± 5% 13.0µs ± 4% -8.56% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 26.3µs ± 4% 26.2µs ± 4% ~ (p=0.224 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 13.3µs ± 5% 13.2µs ± 4% ~ (p=0.098 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.9µs ± 4% 12.8µs ± 3% -0.68% (p=0.015 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 10.9µs ± 6% 10.9µs ± 4% ~ (p=0.052 n=59+58) BM_SerializeDescriptor_Proto2 7.56µs ± 6% 7.62µs ± 6% ~ (p=0.111 n=58+58) BM_SerializeDescriptor_Upb 12.8µs ± 4% 12.8µs ± 4% ~ (p=0.241 n=56+56) name old allocs/op new allocs/op delta BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<NoLayout> 5.98k ± 0% 5.98k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<WithLayout> 5.98k ± 0% 5.98k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<NoLayout> 80.9k ± 0% 80.9k ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<WithLayout> 82.1k ± 0% 82.1k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, UseArena, Copy> 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta BM_ArenaOneAlloc 344 ± 0% 344 ± 0% ~ (all samples are equal) BM_ArenaInitialBlockOneAlloc 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<NoLayout> 9.60M ± 0% 9.60M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Upb<WithLayout> 9.68M ± 0% 9.68M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<NoLayout> 6.41M ± 0% 6.41M ± 0% ~ (all samples are equal) BM_LoadAdsDescriptor_Proto2<WithLayout> 6.44M ± 0% 6.44M ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Upb_FileDesc<InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, UseArena, Copy> 40.7k ± 0% 40.7k ± 0% ~ (all samples are equal) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Proto2 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) BM_SerializeDescriptor_Upb 0.00 ±NaN% 0.00 ±NaN% ~ (all samples are equal) name old speed new speed delta BM_LoadAdsDescriptor_Upb<NoLayout> 113MB/s ± 9% 113MB/s ± 8% ~ (p=0.712 n=47+55) BM_LoadAdsDescriptor_Upb<WithLayout> 107MB/s ± 8% 108MB/s ± 8% ~ (p=0.200 n=55+54) BM_LoadAdsDescriptor_Proto2<NoLayout> 52.5MB/s ± 8% 53.3MB/s ± 7% +1.51% (p=0.018 n=59+59) BM_LoadAdsDescriptor_Proto2<WithLayout> 51.9MB/s ± 7% 52.4MB/s ± 8% +1.01% (p=0.050 n=58+58) BM_Parse_Upb_FileDesc<UseArena, Copy> 473MB/s ± 4% 514MB/s ± 4% +8.52% (p=0.000 n=57+59) BM_Parse_Upb_FileDesc<UseArena, Alias> 518MB/s ± 4% 566MB/s ± 5% +9.30% (p=0.000 n=57+60) BM_Parse_Upb_FileDesc<InitBlock, Copy> 480MB/s ± 4% 521MB/s ± 5% +8.69% (p=0.000 n=59+60) BM_Parse_Upb_FileDesc<InitBlock, Alias> 528MB/s ± 4% 578MB/s ± 4% +9.36% (p=0.000 n=57+58) BM_Parse_Proto2<FileDesc, NoArena, Copy> 286MB/s ± 4% 287MB/s ± 4% ~ (p=0.195 n=55+53) BM_Parse_Proto2<FileDesc, UseArena, Copy> 566MB/s ± 5% 570MB/s ± 4% ~ (p=0.085 n=59+59) BM_Parse_Proto2<FileDesc, InitBlock, Copy> 583MB/s ± 5% 587MB/s ± 3% +0.64% (p=0.023 n=60+58) BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 688MB/s ± 6% 693MB/s ± 4% ~ (p=0.063 n=59+58) BM_SerializeDescriptor_Proto2 995MB/s ± 6% 988MB/s ± 5% ~ (p=0.147 n=57+58) BM_SerializeDescriptor_Upb 586MB/s ± 4% 589MB/s ± 4% ~ (p=0.163 n=59+56) ``` PiperOrigin-RevId: 462022073
2 years ago
ret->table_mask = -1;
ret->required_count = 0;
ret->subs = subs;
ret->fields = fields;
return ret;
}
static size_t upb_MiniTable_EnumSize(size_t count) {
return sizeof(upb_MiniTable_Enum) + count * sizeof(uint32_t);
}
static upb_MiniTable_Enum* _upb_MiniTable_AddEnumDataMember(upb_MtDecoder* d,
uint32_t val) {
if (d->enum_data_count == d->enum_data_capacity) {
size_t old_sz = upb_MiniTable_EnumSize(d->enum_data_capacity);
d->enum_data_capacity = UPB_MAX(2, d->enum_data_capacity * 2);
size_t new_sz = upb_MiniTable_EnumSize(d->enum_data_capacity);
d->enum_table = upb_Arena_Realloc(d->arena, d->enum_table, old_sz, new_sz);
upb_MtDecoder_CheckOutOfMemory(d, d->enum_table);
}
d->enum_table->data[d->enum_data_count++] = val;
return d->enum_table;
}
static void upb_MiniTable_BuildEnumValue(upb_MtDecoder* d, uint32_t val) {
upb_MiniTable_Enum* table = d->enum_table;
d->enum_value_count++;
if (table->value_count || (val > 512 && d->enum_value_count < val / 32)) {
if (table->value_count == 0) {
assert(d->enum_data_count == table->mask_limit / 32);
}
table = _upb_MiniTable_AddEnumDataMember(d, val);
table->value_count++;
} else {
uint32_t new_mask_limit = ((val / 32) + 1) * 32;
while (table->mask_limit < new_mask_limit) {
table = _upb_MiniTable_AddEnumDataMember(d, 0);
table->mask_limit += 32;
}
table->data[val / 32] |= 1ULL << (val % 32);
}
}
upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
upb_Arena* arena,
upb_Status* status) {
upb_MtDecoder d = {
.enum_table = upb_Arena_Malloc(arena, upb_MiniTable_EnumSize(2)),
.enum_value_count = 0,
.enum_data_count = 0,
.enum_data_capacity = 1,
.status = status,
.end = UPB_PTRADD(data, len),
.arena = arena,
};
if (UPB_SETJMP(d.err)) {
return NULL;
}
upb_MtDecoder_CheckOutOfMemory(&d, d.enum_table);
// Guarantee at least 64 bits of mask without checking mask size.
d.enum_table->mask_limit = 64;
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
d.enum_table->value_count = 0;
3 years ago
const char* ptr = data;
uint32_t base = 0;
while (ptr < d.end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxEnumMask) {
uint32_t mask = upb_FromBase92(ch);
for (int i = 0; i < 5; i++, base++, mask >>= 1) {
if (mask & 1) upb_MiniTable_BuildEnumValue(&d, base);
}
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
uint32_t skip;
ptr = upb_MiniTable_DecodeBase92Varint(&d, ptr, ch,
kUpb_EncodedValue_MinSkip,
kUpb_EncodedValue_MaxSkip, &skip);
base += skip;
} else {
upb_Status_SetErrorFormat(status, "Unexpected character: %c", ch);
return NULL;
}
}
return d.enum_table;
}
const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
upb_MiniTable_Extension* ext,
const upb_MiniTable* extendee,
upb_MiniTable_Sub sub,
upb_Status* status) {
upb_MtDecoder decoder = {
3 years ago
.arena = NULL,
.status = status,
.table = NULL,
};
if (UPB_SETJMP(decoder.err)) {
return NULL;
}
uint16_t count = 0;
const char* ret =
upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL);
if (!ret) return NULL;
upb_MiniTable_Field* f = &ext->field;
f->mode |= kUpb_LabelFlags_IsExtension;
f->offset = 0;
f->presence = 0;
if (extendee->ext & kUpb_ExtMode_IsMessageSet) {
// Extensions of MessageSet must be messages.
if (!upb_IsSubMessage(f)) return NULL;
// Extensions of MessageSet must be non-repeating.
if ((f->mode & kUpb_FieldMode_Mask) == kUpb_FieldMode_Array) return NULL;
}
ext->extendee = extendee;
ext->sub = sub;
return ret;
}
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, upb_Status* status) {
void* buf = NULL;
size_t size = 0;
upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena,
&buf, &size, status);
free(buf);
return ret;
}
void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
upb_MiniTable_Field* field,
const upb_MiniTable* sub) {
UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
(uintptr_t)field <
(uintptr_t)(table->fields + table->field_count));
if (sub->ext & kUpb_ExtMode_IsMapEntry) {
field->mode =
(kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift) | kUpb_FieldMode_Map;
}
upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
table_sub->submsg = sub;
}
3 years ago
void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
const upb_MiniTable_Enum* sub) {
UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field &&
(uintptr_t)field <
(uintptr_t)(table->fields + table->field_count));
upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index];
table_sub->subenum = sub;
}