add version/tag chars to the start of all mini descriptors

Verified during decoding. The specific values are just placeholders for now.

PiperOrigin-RevId: 481009599
pull/13171/head
Eric Salo 2 years ago committed by Copybara-Service
parent df34b04658
commit 36ce2fa7d1
  1. 83
      upb/mini_table.c
  2. 4
      upb/mini_table.h
  3. 5
      upb/mini_table.hpp
  4. 4
      upb/mini_table_test.cc
  5. 3
      upb/reflection/enum_def.c

@ -83,6 +83,12 @@ enum {
kUpb_EncodedValue_MaxEnumMask = 'A',
};
enum {
kUpb_EncodedVersion_EnumV1 = '!',
kUpb_EncodedVersion_ExtensionV1 = '#',
kUpb_EncodedVersion_MessageV1 = '$',
};
char upb_ToBase92(int8_t ch) {
static const char kUpb_ToBase92[] = {
' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/',
@ -156,14 +162,19 @@ static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal(
return ret;
}
static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
static char* upb_MtDataEncoder_PutRaw(upb_MtDataEncoder* e, char* ptr,
char ch) {
upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal;
UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize);
if (ptr == e->end) return NULL;
*ptr++ = upb_ToBase92(ch);
*ptr++ = ch;
return ptr;
}
static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) {
return upb_MtDataEncoder_PutRaw(e, ptr, upb_ToBase92(ch));
}
static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr,
uint32_t val, int min, int max) {
int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1);
@ -195,6 +206,10 @@ char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
in->state.msg_state.msg_modifiers = 0;
in->state.msg_state.last_field_num = 0;
in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_ExtensionV1);
if (!ptr) return NULL;
return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
}
@ -204,6 +219,10 @@ char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
in->state.msg_state.msg_modifiers = msg_mod;
in->state.msg_state.last_field_num = 0;
in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MessageV1);
if (!ptr) return NULL;
return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod);
}
@ -303,10 +322,12 @@ char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
return ptr;
}
void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, NULL);
char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
in->state.enum_state.present_values_mask = 0;
in->state.enum_state.last_written_value = 0;
return upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_EnumV1);
}
static char* upb_MtDataEncoder_FlushDenseEnumMask(upb_MtDataEncoder* e,
@ -1021,6 +1042,16 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
goto done;
}
// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_MessageV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid message version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}
upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);
decoder.table->size = 0;
@ -1141,7 +1172,7 @@ static void upb_MiniTable_BuildEnumValue(upb_MtDecoder* d, uint32_t val) {
upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
upb_Arena* arena,
upb_Status* status) {
upb_MtDecoder d = {
upb_MtDecoder decoder = {
.enum_table = upb_Arena_Malloc(arena, upb_MiniTable_EnumSize(2)),
.enum_value_count = 0,
.enum_data_count = 0,
@ -1151,33 +1182,41 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
.arena = arena,
};
if (UPB_SETJMP(d.err)) {
return NULL;
if (UPB_SETJMP(decoder.err)) return NULL;
// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_EnumV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid enum version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}
upb_MtDecoder_CheckOutOfMemory(&d, d.enum_table);
upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.enum_table);
// Guarantee at least 64 bits of mask without checking mask size.
d.enum_table->mask_limit = 64;
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
d.enum_table = _upb_MiniTable_AddEnumDataMember(&d, 0);
decoder.enum_table->mask_limit = 64;
decoder.enum_table = _upb_MiniTable_AddEnumDataMember(&decoder, 0);
decoder.enum_table = _upb_MiniTable_AddEnumDataMember(&decoder, 0);
d.enum_table->value_count = 0;
decoder.enum_table->value_count = 0;
const char* ptr = data;
uint32_t base = 0;
while (ptr < d.end) {
while (ptr < decoder.end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxEnumMask) {
uint32_t mask = upb_FromBase92(ch);
for (int i = 0; i < 5; i++, base++, mask >>= 1) {
if (mask & 1) upb_MiniTable_BuildEnumValue(&d, base);
if (mask & 1) upb_MiniTable_BuildEnumValue(&decoder, base);
}
} else if (kUpb_EncodedValue_MinSkip <= ch &&
ch <= kUpb_EncodedValue_MaxSkip) {
uint32_t skip;
ptr = upb_MiniTable_DecodeBase92Varint(&d, ptr, ch,
ptr = upb_MiniTable_DecodeBase92Varint(&decoder, ptr, ch,
kUpb_EncodedValue_MinSkip,
kUpb_EncodedValue_MaxSkip, &skip);
base += skip;
@ -1187,7 +1226,7 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
}
}
return d.enum_table;
return decoder.enum_table;
}
const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
@ -1201,8 +1240,16 @@ const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
.table = NULL,
};
if (UPB_SETJMP(decoder.err)) {
return NULL;
if (UPB_SETJMP(decoder.err)) return NULL;
// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_ExtensionV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid ext version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}
uint16_t count = 0;

@ -114,9 +114,9 @@ char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr);
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
uint32_t field_num);
// Encodes the set of values for a given enum. The values must be given in
// Encodes the set of values for a given enum. The values must be given in
// order (after casting to uint32_t), and repeats are not allowed.
void upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e);
char* upb_MtDataEncoder_StartEnum(upb_MtDataEncoder* e, char* ptr);
char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
uint32_t val);
char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr);

@ -65,7 +65,10 @@ class MtDataEncoder {
});
}
void StartEnum() { upb_MtDataEncoder_StartEnum(&encoder_); }
bool StartEnum() {
return appender_(
[=](char* buf) { return upb_MtDataEncoder_StartEnum(&encoder_, buf); });
}
bool PutEnumValue(uint32_t enum_value) {
return appender_([=](char* buf) {

@ -218,7 +218,7 @@ TEST(MiniTableEnumTest, Enum) {
upb::Arena arena;
upb::MtDataEncoder e;
e.StartEnum();
ASSERT_TRUE(e.StartEnum());
absl::flat_hash_set<int32_t> values;
for (int i = 0; i < 256; i++) {
values.insert(i * 2);
@ -256,7 +256,7 @@ TEST(MiniTableEnumTest, PositiveAndNegative) {
upb::Arena arena;
upb::MtDataEncoder e;
e.StartEnum();
ASSERT_TRUE(e.StartEnum());
absl::flat_hash_set<int32_t> values;
for (int i = 0; i < 100; i++) {
values.insert(i);

@ -158,7 +158,8 @@ bool upb_EnumDef_MiniDescriptorEncode(const upb_EnumDef* e, upb_Arena* a,
if (!sorted) return false;
}
upb_MtDataEncoder_StartEnum(&s.e);
if (!_upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_StartEnum(&s.e, s.ptr);
// Duplicate values are allowed but we only encode each value once.
uint32_t previous = 0;

Loading…
Cancel
Save