Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

649 lines
21 KiB

// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
// We encode backwards, to avoid pre-computing lengths (one-pass encode).
#include "upb/wire/encode.h"
#include <setjmp.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include "upb/base/descriptor_constants.h"
#include "upb/base/string_view.h"
#include "upb/hash/common.h"
#include "upb/hash/str_table.h"
#include "upb/mem/arena.h"
#include "upb/message/array.h"
#include "upb/message/internal/accessors.h"
#include "upb/message/internal/array.h"
#include "upb/message/internal/extension.h"
#include "upb/message/internal/map.h"
#include "upb/message/internal/map_entry.h"
#include "upb/message/internal/map_sorter.h"
#include "upb/message/map.h"
#include "upb/message/message.h"
#include "upb/message/tagged_ptr.h"
#include "upb/mini_table/extension.h"
#include "upb/mini_table/field.h"
#include "upb/mini_table/internal/field.h"
#include "upb/mini_table/internal/message.h"
#include "upb/mini_table/message.h"
#include "upb/mini_table/sub.h"
#include "upb/wire/internal/constants.h"
#include "upb/wire/internal/swap.h"
#include "upb/wire/types.h"
// Must be last.
#include "upb/port/def.inc"
#define UPB_PB_VARINT_MAX_LEN 10
UPB_NOINLINE
static size_t encode_varint64(uint64_t val, char* buf) {
size_t i = 0;
do {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
buf[i++] = byte;
} while (val);
return i;
}
static uint32_t encode_zz32(int32_t n) {
return ((uint32_t)n << 1) ^ (n >> 31);
}
static uint64_t encode_zz64(int64_t n) {
return ((uint64_t)n << 1) ^ (n >> 63);
}
typedef struct {
upb_EncodeStatus status;
jmp_buf err;
upb_Arena* arena;
char *buf, *ptr, *limit;
int options;
int depth;
_upb_mapsorter sorter;
} upb_encstate;
static size_t upb_roundup_pow2(size_t bytes) {
size_t ret = 128;
while (ret < bytes) {
ret *= 2;
}
return ret;
}
UPB_NORETURN static void encode_err(upb_encstate* e, upb_EncodeStatus s) {
UPB_ASSERT(s != kUpb_EncodeStatus_Ok);
e->status = s;
UPB_LONGJMP(e->err, 1);
}
UPB_NOINLINE
static void encode_growbuffer(upb_encstate* e, size_t bytes) {
size_t old_size = e->limit - e->buf;
size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
char* new_buf = upb_Arena_Realloc(e->arena, e->buf, old_size, new_size);
if (!new_buf) encode_err(e, kUpb_EncodeStatus_OutOfMemory);
// We want previous data at the end, realloc() put it at the beginning.
// TODO: This is somewhat inefficient since we are copying twice.
// Maybe create a realloc() that copies to the end of the new buffer?
if (old_size > 0) {
memmove(new_buf + new_size - old_size, e->buf, old_size);
}
e->ptr = new_buf + new_size - (e->limit - e->ptr);
e->limit = new_buf + new_size;
e->buf = new_buf;
e->ptr -= bytes;
}
/* Call to ensure that at least "bytes" bytes are available for writing at
* e->ptr. Returns false if the bytes could not be allocated. */
UPB_FORCEINLINE
static void encode_reserve(upb_encstate* e, size_t bytes) {
if ((size_t)(e->ptr - e->buf) < bytes) {
encode_growbuffer(e, bytes);
return;
}
e->ptr -= bytes;
}
/* Writes the given bytes to the buffer, handling reserve/advance. */
static void encode_bytes(upb_encstate* e, const void* data, size_t len) {
if (len == 0) return; /* memcpy() with zero size is UB */
encode_reserve(e, len);
memcpy(e->ptr, data, len);
}
static void encode_fixed64(upb_encstate* e, uint64_t val) {
val = _upb_BigEndian_Swap64(val);
encode_bytes(e, &val, sizeof(uint64_t));
}
static void encode_fixed32(upb_encstate* e, uint32_t val) {
val = _upb_BigEndian_Swap32(val);
encode_bytes(e, &val, sizeof(uint32_t));
}
UPB_NOINLINE
static void encode_longvarint(upb_encstate* e, uint64_t val) {
size_t len;
char* start;
encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
len = encode_varint64(val, e->ptr);
start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
memmove(start, e->ptr, len);
e->ptr = start;
}
UPB_FORCEINLINE
static void encode_varint(upb_encstate* e, uint64_t val) {
if (val < 128 && e->ptr != e->buf) {
--e->ptr;
*e->ptr = val;
} else {
encode_longvarint(e, val);
}
}
static void encode_double(upb_encstate* e, double d) {
uint64_t u64;
UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
memcpy(&u64, &d, sizeof(uint64_t));
encode_fixed64(e, u64);
}
static void encode_float(upb_encstate* e, float d) {
uint32_t u32;
UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
memcpy(&u32, &d, sizeof(uint32_t));
encode_fixed32(e, u32);
}
static void encode_tag(upb_encstate* e, uint32_t field_number,
uint8_t wire_type) {
encode_varint(e, (field_number << 3) | wire_type);
}
static void encode_fixedarray(upb_encstate* e, const upb_Array* arr,
size_t elem_size, uint32_t tag) {
size_t bytes = arr->size * elem_size;
const char* data = _upb_array_constptr(arr);
const char* ptr = data + bytes - elem_size;
if (tag || !_upb_IsLittleEndian()) {
while (true) {
if (elem_size == 4) {
uint32_t val;
memcpy(&val, ptr, sizeof(val));
val = _upb_BigEndian_Swap32(val);
encode_bytes(e, &val, elem_size);
} else {
UPB_ASSERT(elem_size == 8);
uint64_t val;
memcpy(&val, ptr, sizeof(val));
val = _upb_BigEndian_Swap64(val);
encode_bytes(e, &val, elem_size);
}
if (tag) encode_varint(e, tag);
if (ptr == data) break;
ptr -= elem_size;
}
} else {
encode_bytes(e, data, bytes);
}
}
static void encode_message(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable* m, size_t* size);
static void encode_TaggedMessagePtr(upb_encstate* e,
upb_TaggedMessagePtr tagged,
const upb_MiniTable* m, size_t* size) {
if (upb_TaggedMessagePtr_IsEmpty(tagged)) {
m = UPB_PRIVATE(_upb_MiniTable_Empty)();
}
encode_message(e, _upb_TaggedMessagePtr_GetMessage(tagged), m, size);
}
static void encode_scalar(upb_encstate* e, const void* _field_mem,
const upb_MiniTableSub* subs,
const upb_MiniTableField* f) {
const char* field_mem = _field_mem;
int wire_type;
#define CASE(ctype, type, wtype, encodeval) \
{ \
ctype val = *(ctype*)field_mem; \
encode_##type(e, encodeval); \
wire_type = wtype; \
break; \
}
switch (f->UPB_PRIVATE(descriptortype)) {
case kUpb_FieldType_Double:
CASE(double, double, kUpb_WireType_64Bit, val);
case kUpb_FieldType_Float:
CASE(float, float, kUpb_WireType_32Bit, val);
case kUpb_FieldType_Int64:
case kUpb_FieldType_UInt64:
CASE(uint64_t, varint, kUpb_WireType_Varint, val);
case kUpb_FieldType_UInt32:
CASE(uint32_t, varint, kUpb_WireType_Varint, val);
case kUpb_FieldType_Int32:
case kUpb_FieldType_Enum:
CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val);
case kUpb_FieldType_SFixed64:
case kUpb_FieldType_Fixed64:
CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val);
case kUpb_FieldType_Fixed32:
case kUpb_FieldType_SFixed32:
CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val);
case kUpb_FieldType_Bool:
CASE(bool, varint, kUpb_WireType_Varint, val);
case kUpb_FieldType_SInt32:
CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val));
case kUpb_FieldType_SInt64:
CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val));
case kUpb_FieldType_String:
case kUpb_FieldType_Bytes: {
upb_StringView view = *(upb_StringView*)field_mem;
encode_bytes(e, view.data, view.size);
encode_varint(e, view.size);
wire_type = kUpb_WireType_Delimited;
break;
}
case kUpb_FieldType_Group: {
size_t size;
upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem;
const upb_MiniTable* subm =
upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]);
if (submsg == 0) {
return;
}
if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
encode_tag(e, f->number, kUpb_WireType_EndGroup);
encode_TaggedMessagePtr(e, submsg, subm, &size);
wire_type = kUpb_WireType_StartGroup;
e->depth++;
break;
}
case kUpb_FieldType_Message: {
size_t size;
upb_TaggedMessagePtr submsg = *(upb_TaggedMessagePtr*)field_mem;
const upb_MiniTable* subm =
upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]);
if (submsg == 0) {
return;
}
if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
encode_TaggedMessagePtr(e, submsg, subm, &size);
encode_varint(e, size);
wire_type = kUpb_WireType_Delimited;
e->depth++;
break;
}
default:
UPB_UNREACHABLE();
}
#undef CASE
encode_tag(e, f->number, wire_type);
}
static void encode_array(upb_encstate* e, const upb_Message* msg,
const upb_MiniTableSub* subs,
const upb_MiniTableField* f) {
const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*);
bool packed = upb_MiniTableField_IsPacked(f);
size_t pre_len = e->limit - e->ptr;
if (arr == NULL || arr->size == 0) {
return;
}
#define VARINT_CASE(ctype, encode) \
{ \
const ctype* start = _upb_array_constptr(arr); \
const ctype* ptr = start + arr->size; \
uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \
do { \
ptr--; \
encode_varint(e, encode); \
if (tag) encode_varint(e, tag); \
} while (ptr != start); \
} \
break;
#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
switch (f->UPB_PRIVATE(descriptortype)) {
case kUpb_FieldType_Double:
encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit));
break;
case kUpb_FieldType_Float:
encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit));
break;
case kUpb_FieldType_SFixed64:
case kUpb_FieldType_Fixed64:
encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit));
break;
case kUpb_FieldType_Fixed32:
case kUpb_FieldType_SFixed32:
encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit));
break;
case kUpb_FieldType_Int64:
case kUpb_FieldType_UInt64:
VARINT_CASE(uint64_t, *ptr);
case kUpb_FieldType_UInt32:
VARINT_CASE(uint32_t, *ptr);
case kUpb_FieldType_Int32:
case kUpb_FieldType_Enum:
VARINT_CASE(int32_t, (int64_t)*ptr);
case kUpb_FieldType_Bool:
VARINT_CASE(bool, *ptr);
case kUpb_FieldType_SInt32:
VARINT_CASE(int32_t, encode_zz32(*ptr));
case kUpb_FieldType_SInt64:
VARINT_CASE(int64_t, encode_zz64(*ptr));
case kUpb_FieldType_String:
case kUpb_FieldType_Bytes: {
const upb_StringView* start = _upb_array_constptr(arr);
const upb_StringView* ptr = start + arr->size;
do {
ptr--;
encode_bytes(e, ptr->data, ptr->size);
encode_varint(e, ptr->size);
encode_tag(e, f->number, kUpb_WireType_Delimited);
} while (ptr != start);
return;
}
case kUpb_FieldType_Group: {
const upb_TaggedMessagePtr* start = _upb_array_constptr(arr);
const upb_TaggedMessagePtr* ptr = start + arr->size;
const upb_MiniTable* subm =
upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]);
if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
do {
size_t size;
ptr--;
encode_tag(e, f->number, kUpb_WireType_EndGroup);
encode_TaggedMessagePtr(e, *ptr, subm, &size);
encode_tag(e, f->number, kUpb_WireType_StartGroup);
} while (ptr != start);
e->depth++;
return;
}
case kUpb_FieldType_Message: {
const upb_TaggedMessagePtr* start = _upb_array_constptr(arr);
const upb_TaggedMessagePtr* ptr = start + arr->size;
const upb_MiniTable* subm =
upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]);
if (--e->depth == 0) encode_err(e, kUpb_EncodeStatus_MaxDepthExceeded);
do {
size_t size;
ptr--;
encode_TaggedMessagePtr(e, *ptr, subm, &size);
encode_varint(e, size);
encode_tag(e, f->number, kUpb_WireType_Delimited);
} while (ptr != start);
e->depth++;
return;
}
}
#undef VARINT_CASE
if (packed) {
encode_varint(e, e->limit - e->ptr - pre_len);
encode_tag(e, f->number, kUpb_WireType_Delimited);
}
}
static void encode_mapentry(upb_encstate* e, uint32_t number,
const upb_MiniTable* layout,
const upb_MapEntry* ent) {
const upb_MiniTableField* key_field = &layout->UPB_PRIVATE(fields)[0];
const upb_MiniTableField* val_field = &layout->UPB_PRIVATE(fields)[1];
size_t pre_len = e->limit - e->ptr;
size_t size;
encode_scalar(e, &ent->data.v, layout->UPB_PRIVATE(subs), val_field);
encode_scalar(e, &ent->data.k, layout->UPB_PRIVATE(subs), key_field);
size = (e->limit - e->ptr) - pre_len;
encode_varint(e, size);
encode_tag(e, number, kUpb_WireType_Delimited);
}
static void encode_map(upb_encstate* e, const upb_Message* msg,
const upb_MiniTableSub* subs,
const upb_MiniTableField* f) {
const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*);
const upb_MiniTable* layout =
upb_MiniTableSub_Message(subs[f->UPB_PRIVATE(submsg_index)]);
UPB_ASSERT(layout->UPB_PRIVATE(field_count) == 2);
if (map == NULL) return;
if (e->options & kUpb_EncodeOption_Deterministic) {
_upb_sortedmap sorted;
_upb_mapsorter_pushmap(
&e->sorter, layout->UPB_PRIVATE(fields)[0].UPB_PRIVATE(descriptortype),
map, &sorted);
upb_MapEntry ent;
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
encode_mapentry(e, f->number, layout, &ent);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
} else {
intptr_t iter = UPB_STRTABLE_BEGIN;
upb_StringView key;
upb_value val;
while (upb_strtable_next2(&map->table, &key, &val, &iter)) {
upb_MapEntry ent;
_upb_map_fromkey(key, &ent.data.k, map->key_size);
_upb_map_fromvalue(val, &ent.data.v, map->val_size);
encode_mapentry(e, f->number, layout, &ent);
}
}
}
static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
const upb_MiniTableSub* subs,
const upb_MiniTableField* f) {
if (f->presence == 0) {
// Proto3 presence or map/array.
const void* mem = UPB_PTR_AT(msg, f->offset, void);
switch (UPB_PRIVATE(_upb_MiniTableField_GetRep)(f)) {
case kUpb_FieldRep_1Byte: {
char ch;
memcpy(&ch, mem, 1);
return ch != 0;
}
case kUpb_FieldRep_4Byte: {
uint32_t u32;
memcpy(&u32, mem, 4);
return u32 != 0;
}
case kUpb_FieldRep_8Byte: {
uint64_t u64;
memcpy(&u64, mem, 8);
return u64 != 0;
}
case kUpb_FieldRep_StringView: {
const upb_StringView* str = (const upb_StringView*)mem;
return str->size != 0;
}
default:
UPB_UNREACHABLE();
}
} else if (f->presence > 0) {
// Proto2 presence: hasbit.
return _upb_Message_GetHasbitByField(msg, f);
} else {
// Field is in a oneof.
return _upb_Message_GetOneofCase(msg, f) == f->number;
}
}
static void encode_field(upb_encstate* e, const upb_Message* msg,
const upb_MiniTableSub* subs,
const upb_MiniTableField* field) {
switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(field)) {
case kUpb_FieldMode_Array:
encode_array(e, msg, subs, field);
break;
case kUpb_FieldMode_Map:
encode_map(e, msg, subs, field);
break;
case kUpb_FieldMode_Scalar:
encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field);
break;
default:
UPB_UNREACHABLE();
}
}
static void encode_msgset_item(upb_encstate* e,
const upb_Message_Extension* ext) {
size_t size;
encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_EndGroup);
encode_message(e, ext->data.ptr,
upb_MiniTableExtension_GetSubMessage(ext->ext), &size);
encode_varint(e, size);
encode_tag(e, kUpb_MsgSet_Message, kUpb_WireType_Delimited);
encode_varint(e, upb_MiniTableExtension_Number(ext->ext));
encode_tag(e, kUpb_MsgSet_TypeId, kUpb_WireType_Varint);
encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup);
}
static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext,
bool is_message_set) {
if (UPB_UNLIKELY(is_message_set)) {
encode_msgset_item(e, ext);
} else {
encode_field(e, &ext->data, &ext->ext->UPB_PRIVATE(sub),
&ext->ext->UPB_PRIVATE(field));
}
}
static void encode_message(upb_encstate* e, const upb_Message* msg,
const upb_MiniTable* m, size_t* size) {
size_t pre_len = e->limit - e->ptr;
if ((e->options & kUpb_EncodeOption_CheckRequired) &&
m->UPB_PRIVATE(required_count)) {
uint64_t msg_head;
memcpy(&msg_head, msg, 8);
msg_head = _upb_BigEndian_Swap64(msg_head);
if (UPB_PRIVATE(_upb_MiniTable_RequiredMask)(m) & ~msg_head) {
encode_err(e, kUpb_EncodeStatus_MissingRequired);
}
}
if ((e->options & kUpb_EncodeOption_SkipUnknown) == 0) {
size_t unknown_size;
const char* unknown = upb_Message_GetUnknown(msg, &unknown_size);
if (unknown) {
encode_bytes(e, unknown, unknown_size);
}
}
if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) {
/* Encode all extensions together. Unlike C++, we do not attempt to keep
* these in field number order relative to normal fields or even to each
* other. */
size_t ext_count;
const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count);
if (ext_count) {
if (e->options & kUpb_EncodeOption_Deterministic) {
_upb_sortedmap sorted;
_upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted);
while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) {
encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
} else {
const upb_Message_Extension* end = ext + ext_count;
for (; ext != end; ext++) {
encode_ext(e, ext, m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet);
}
}
}
}
if (m->UPB_PRIVATE(field_count)) {
const upb_MiniTableField* f =
&m->UPB_PRIVATE(fields)[m->UPB_PRIVATE(field_count)];
const upb_MiniTableField* first = &m->UPB_PRIVATE(fields)[0];
while (f != first) {
f--;
if (encode_shouldencode(e, msg, m->UPB_PRIVATE(subs), f)) {
encode_field(e, msg, m->UPB_PRIVATE(subs), f);
}
}
}
*size = (e->limit - e->ptr) - pre_len;
}
static upb_EncodeStatus upb_Encoder_Encode(upb_encstate* const encoder,
const void* const msg,
const upb_MiniTable* const l,
char** const buf,
size_t* const size) {
// Unfortunately we must continue to perform hackery here because there are
// code paths which blindly copy the returned pointer without bothering to
// check for errors until much later (b/235839510). So we still set *buf to
// NULL on error and we still set it to non-NULL on a successful empty result.
if (UPB_SETJMP(encoder->err) == 0) {
encode_message(encoder, msg, l, size);
*size = encoder->limit - encoder->ptr;
if (*size == 0) {
static char ch;
*buf = &ch;
} else {
UPB_ASSERT(encoder->ptr);
*buf = encoder->ptr;
}
} else {
UPB_ASSERT(encoder->status != kUpb_EncodeStatus_Ok);
*buf = NULL;
*size = 0;
}
_upb_mapsorter_destroy(&encoder->sorter);
return encoder->status;
}
upb_EncodeStatus upb_Encode(const void* msg, const upb_MiniTable* l,
int options, upb_Arena* arena, char** buf,
size_t* size) {
upb_encstate e;
unsigned depth = (unsigned)options >> 16;
e.status = kUpb_EncodeStatus_Ok;
e.arena = arena;
e.buf = NULL;
e.limit = NULL;
e.ptr = NULL;
e.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
e.options = options;
_upb_mapsorter_init(&e.sorter);
return upb_Encoder_Encode(&e, msg, l, buf, size);
}