|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2021, Google LLC
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Google LLC nor the
|
|
|
|
* names of its contributors may be used to endorse or promote products
|
|
|
|
* derived from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
|
|
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "upb/def.h"
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <setjmp.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "google/protobuf/descriptor.upb.h"
|
|
|
|
#include "upb/reflection.h"
|
|
|
|
|
|
|
|
/* Must be last. */
|
|
|
|
#include "upb/port_def.inc"
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
size_t len;
|
|
|
|
char str[1]; /* Null-terminated string data follows. */
|
|
|
|
} str_t;
|
|
|
|
|
|
|
|
/* The upb core does not generally have a concept of default instances. However
|
|
|
|
* for descriptor options we make an exception since the max size is known and
|
|
|
|
* modest (<200 bytes). All types can share a default instance since it is
|
|
|
|
* initialized to zeroes.
|
|
|
|
*
|
|
|
|
* We have to allocate an extra pointer for upb's internal metadata. */
|
|
|
|
static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
|
|
|
|
static const char* opt_default = &opt_default_buf[sizeof(void*)];
|
|
|
|
|
|
|
|
struct upb_FieldDef {
|
|
|
|
const google_protobuf_FieldOptions* opts;
|
|
|
|
const upb_FileDef* file;
|
|
|
|
const upb_MessageDef* msgdef;
|
|
|
|
const char* full_name;
|
|
|
|
const char* json_name;
|
|
|
|
union {
|
|
|
|
int64_t sint;
|
|
|
|
uint64_t uint;
|
|
|
|
double dbl;
|
|
|
|
float flt;
|
|
|
|
bool boolean;
|
|
|
|
str_t* str;
|
|
|
|
} defaultval;
|
|
|
|
union {
|
|
|
|
const upb_OneofDef* oneof;
|
|
|
|
const upb_MessageDef* extension_scope;
|
|
|
|
} scope;
|
|
|
|
union {
|
|
|
|
const upb_MessageDef* msgdef;
|
|
|
|
const upb_EnumDef* enumdef;
|
|
|
|
const google_protobuf_FieldDescriptorProto* unresolved;
|
|
|
|
} sub;
|
|
|
|
uint32_t number_;
|
|
|
|
uint16_t index_;
|
|
|
|
uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */
|
|
|
|
bool has_default;
|
|
|
|
bool is_extension_;
|
|
|
|
bool packed_;
|
|
|
|
bool proto3_optional_;
|
|
|
|
bool has_json_name_;
|
|
|
|
upb_FieldType type_;
|
|
|
|
upb_Label label_;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_ExtensionRange {
|
|
|
|
const google_protobuf_ExtensionRangeOptions* opts;
|
|
|
|
int32_t start;
|
|
|
|
int32_t end;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_MessageDef {
|
|
|
|
const google_protobuf_MessageOptions* opts;
|
|
|
|
const upb_MiniTable* layout;
|
|
|
|
const upb_FileDef* file;
|
|
|
|
const upb_MessageDef* containing_type;
|
|
|
|
const char* full_name;
|
|
|
|
|
|
|
|
/* Tables for looking up fields by number and name. */
|
|
|
|
upb_inttable itof;
|
|
|
|
upb_strtable ntof;
|
|
|
|
|
|
|
|
/* All nested defs.
|
|
|
|
* MEM: We could save some space here by putting nested defs in a contigous
|
|
|
|
* region and calculating counts from offets or vice-versa. */
|
|
|
|
const upb_FieldDef* fields;
|
|
|
|
const upb_OneofDef* oneofs;
|
|
|
|
const upb_ExtensionRange* ext_ranges;
|
|
|
|
const upb_MessageDef* nested_msgs;
|
|
|
|
const upb_EnumDef* nested_enums;
|
|
|
|
const upb_FieldDef* nested_exts;
|
|
|
|
int field_count;
|
|
|
|
int real_oneof_count;
|
|
|
|
int oneof_count;
|
|
|
|
int ext_range_count;
|
|
|
|
int nested_msg_count;
|
|
|
|
int nested_enum_count;
|
|
|
|
int nested_ext_count;
|
|
|
|
bool in_message_set;
|
|
|
|
upb_WellKnown well_known_type;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_EnumDef {
|
|
|
|
const google_protobuf_EnumOptions* opts;
|
|
|
|
const upb_MiniTable_Enum* layout; // Only for proto2.
|
|
|
|
const upb_FileDef* file;
|
|
|
|
const upb_MessageDef* containing_type; // Could be merged with "file".
|
|
|
|
const char* full_name;
|
|
|
|
upb_strtable ntoi;
|
|
|
|
upb_inttable iton;
|
|
|
|
const upb_EnumValueDef* values;
|
|
|
|
int value_count;
|
|
|
|
int32_t defaultval;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_EnumValueDef {
|
|
|
|
const google_protobuf_EnumValueOptions* opts;
|
|
|
|
const upb_EnumDef* parent;
|
|
|
|
const char* full_name;
|
|
|
|
int32_t number;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_OneofDef {
|
|
|
|
const google_protobuf_OneofOptions* opts;
|
|
|
|
const upb_MessageDef* parent;
|
|
|
|
const char* full_name;
|
|
|
|
int field_count;
|
|
|
|
bool synthetic;
|
|
|
|
const upb_FieldDef** fields;
|
|
|
|
upb_strtable ntof;
|
|
|
|
upb_inttable itof;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_FileDef {
|
|
|
|
const google_protobuf_FileOptions* opts;
|
|
|
|
const char* name;
|
|
|
|
const char* package;
|
|
|
|
|
|
|
|
const upb_FileDef** deps;
|
|
|
|
const int32_t* public_deps;
|
|
|
|
const int32_t* weak_deps;
|
|
|
|
const upb_MessageDef* top_lvl_msgs;
|
|
|
|
const upb_EnumDef* top_lvl_enums;
|
|
|
|
const upb_FieldDef* top_lvl_exts;
|
|
|
|
const upb_ServiceDef* services;
|
|
|
|
const upb_MiniTable_Extension** ext_layouts;
|
|
|
|
const upb_DefPool* symtab;
|
|
|
|
|
|
|
|
int dep_count;
|
|
|
|
int public_dep_count;
|
|
|
|
int weak_dep_count;
|
|
|
|
int top_lvl_msg_count;
|
|
|
|
int top_lvl_enum_count;
|
|
|
|
int top_lvl_ext_count;
|
|
|
|
int service_count;
|
|
|
|
int ext_count; /* All exts in the file. */
|
|
|
|
upb_Syntax syntax;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_MethodDef {
|
|
|
|
const google_protobuf_MethodOptions* opts;
|
|
|
|
upb_ServiceDef* service;
|
|
|
|
const char* full_name;
|
|
|
|
const upb_MessageDef* input_type;
|
|
|
|
const upb_MessageDef* output_type;
|
|
|
|
bool client_streaming;
|
|
|
|
bool server_streaming;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_ServiceDef {
|
|
|
|
const google_protobuf_ServiceOptions* opts;
|
|
|
|
const upb_FileDef* file;
|
|
|
|
const char* full_name;
|
|
|
|
upb_MethodDef* methods;
|
|
|
|
int method_count;
|
|
|
|
int index;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct upb_DefPool {
|
|
|
|
upb_Arena* arena;
|
|
|
|
upb_strtable syms; /* full_name -> packed def ptr */
|
|
|
|
upb_strtable files; /* file_name -> upb_FileDef* */
|
|
|
|
upb_inttable exts; /* upb_MiniTable_Extension* -> upb_FieldDef* */
|
|
|
|
upb_ExtensionRegistry* extreg;
|
|
|
|
size_t bytes_loaded;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Inside a symtab we store tagged pointers to specific def types. */
|
|
|
|
typedef enum {
|
|
|
|
UPB_DEFTYPE_MASK = 7,
|
|
|
|
|
|
|
|
/* Only inside symtab table. */
|
|
|
|
UPB_DEFTYPE_EXT = 0,
|
|
|
|
UPB_DEFTYPE_MSG = 1,
|
|
|
|
UPB_DEFTYPE_ENUM = 2,
|
|
|
|
UPB_DEFTYPE_ENUMVAL = 3,
|
|
|
|
UPB_DEFTYPE_SERVICE = 4,
|
|
|
|
|
|
|
|
/* Only inside message table. */
|
|
|
|
UPB_DEFTYPE_FIELD = 0,
|
|
|
|
UPB_DEFTYPE_ONEOF = 1,
|
|
|
|
UPB_DEFTYPE_FIELD_JSONNAME = 2,
|
|
|
|
|
|
|
|
/* Only inside file table. */
|
|
|
|
UPB_DEFTYPE_FILE = 0,
|
|
|
|
UPB_DEFTYPE_LAYOUT = 1
|
|
|
|
} upb_deftype_t;
|
|
|
|
|
|
|
|
#define FIELD_TYPE_UNSPECIFIED 0
|
|
|
|
|
|
|
|
static upb_deftype_t deftype(upb_value v) {
|
|
|
|
uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
|
|
|
|
return num & UPB_DEFTYPE_MASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const void* unpack_def(upb_value v, upb_deftype_t type) {
|
|
|
|
uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
|
|
|
|
return (num & UPB_DEFTYPE_MASK) == type
|
|
|
|
? (const void*)(num & ~UPB_DEFTYPE_MASK)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_value pack_def(const void* ptr, upb_deftype_t type) {
|
|
|
|
uintptr_t num = (uintptr_t)ptr;
|
|
|
|
UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0);
|
|
|
|
num |= type;
|
|
|
|
return upb_value_constptr((const void*)num);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
|
|
|
|
static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
|
|
|
|
return c >= low && c <= high;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char upb_ascii_lower(char ch) {
|
|
|
|
// Per ASCII this will lower-case a letter. If the result is a letter, the
|
|
|
|
// input was definitely a letter. If the output is not a letter, this may
|
|
|
|
// have transformed the character unpredictably.
|
|
|
|
return ch | 0x20;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool upb_isletter(char c) {
|
|
|
|
char lower = upb_ascii_lower(c);
|
|
|
|
return upb_isbetween(lower, 'a', 'z') || c == '_';
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool upb_isalphanum(char c) {
|
|
|
|
return upb_isletter(c) || upb_isbetween(c, '0', '9');
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* shortdefname(const char* fullname) {
|
|
|
|
const char* p;
|
|
|
|
|
|
|
|
if (fullname == NULL) {
|
|
|
|
return NULL;
|
|
|
|
} else if ((p = strrchr(fullname, '.')) == NULL) {
|
|
|
|
/* No '.' in the name, return the full string. */
|
|
|
|
return fullname;
|
|
|
|
} else {
|
|
|
|
/* Return one past the last '.'. */
|
|
|
|
return p + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* All submessage fields are lower than all other fields.
|
|
|
|
* Secondly, fields are increasing in order. */
|
|
|
|
uint32_t field_rank(const upb_FieldDef* f) {
|
|
|
|
uint32_t ret = upb_FieldDef_Number(f);
|
|
|
|
const uint32_t high_bit = 1 << 30;
|
|
|
|
UPB_ASSERT(ret < high_bit);
|
|
|
|
if (!upb_FieldDef_IsSubMessage(f)) ret |= high_bit;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int cmp_fields(const void* p1, const void* p2) {
|
|
|
|
const upb_FieldDef* f1 = *(upb_FieldDef* const*)p1;
|
|
|
|
const upb_FieldDef* f2 = *(upb_FieldDef* const*)p2;
|
|
|
|
return field_rank(f1) - field_rank(f2);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void upb_Status_setoom(upb_Status* status) {
|
|
|
|
upb_Status_SetErrorMessage(status, "out of memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void assign_msg_wellknowntype(upb_MessageDef* m) {
|
|
|
|
const char* name = upb_MessageDef_FullName(m);
|
|
|
|
if (name == NULL) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Unspecified;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (!strcmp(name, "google.protobuf.Any")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Any;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.FieldMask")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_FieldMask;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.Duration")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Duration;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.Timestamp")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Timestamp;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.DoubleValue")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_DoubleValue;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.FloatValue")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_FloatValue;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.Int64Value")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Int64Value;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.UInt64Value")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_UInt64Value;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.Int32Value")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Int32Value;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.UInt32Value")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_UInt32Value;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.BoolValue")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_BoolValue;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.StringValue")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_StringValue;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.BytesValue")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_BytesValue;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.Value")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Value;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.ListValue")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_ListValue;
|
|
|
|
} else if (!strcmp(name, "google.protobuf.Struct")) {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Struct;
|
|
|
|
} else {
|
|
|
|
m->well_known_type = kUpb_WellKnown_Unspecified;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_EnumDef ****************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) {
|
|
|
|
return e->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_EnumDef_HasOptions(const upb_EnumDef* e) {
|
|
|
|
return e->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; }
|
|
|
|
|
|
|
|
const char* upb_EnumDef_Name(const upb_EnumDef* e) {
|
|
|
|
return shortdefname(e->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; }
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) {
|
|
|
|
return e->containing_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t upb_EnumDef_Default(const upb_EnumDef* e) {
|
|
|
|
UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval));
|
|
|
|
return e->defaultval;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; }
|
|
|
|
|
|
|
|
const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
|
|
|
|
const upb_EnumDef* def, const char* name, size_t len) {
|
|
|
|
upb_value v;
|
|
|
|
return upb_strtable_lookup2(&def->ntoi, name, len, &v)
|
|
|
|
? upb_value_getconstptr(v)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* def,
|
|
|
|
int32_t num) {
|
|
|
|
upb_value v;
|
|
|
|
return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getconstptr(v)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) {
|
|
|
|
// We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect
|
|
|
|
// this to be faster (especially for small numbers).
|
|
|
|
return upb_MiniTable_Enum_CheckValue(e->layout, num);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < e->value_count);
|
|
|
|
return &e->values[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_EnumValueDef
|
|
|
|
* *************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options(
|
|
|
|
const upb_EnumValueDef* e) {
|
|
|
|
return e->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* e) {
|
|
|
|
return e->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* ev) {
|
|
|
|
return ev->parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* ev) {
|
|
|
|
return ev->full_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_EnumValueDef_Name(const upb_EnumValueDef* ev) {
|
|
|
|
return shortdefname(ev->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* ev) {
|
|
|
|
return ev->number;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* ev) {
|
|
|
|
// Compute index in our parent's array.
|
|
|
|
return ev - ev->parent->values;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_ExtensionRange
|
|
|
|
* ***************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options(
|
|
|
|
const upb_ExtensionRange* r) {
|
|
|
|
return r->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) {
|
|
|
|
return r->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* e) {
|
|
|
|
return e->start;
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t upb_ExtensionRange_End(const upb_ExtensionRange* e) { return e->end; }
|
|
|
|
|
|
|
|
/* upb_FieldDef ***************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_FieldOptions* upb_FieldDef_Options(
|
|
|
|
const upb_FieldDef* f) {
|
|
|
|
return f->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
|
|
|
|
return f->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
|
|
|
|
return f->full_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
|
|
|
|
switch (f->type_) {
|
|
|
|
case upb_FieldType_Double:
|
|
|
|
return kUpb_CType_Double;
|
|
|
|
case upb_FieldType_Float:
|
|
|
|
return kUpb_CType_Float;
|
|
|
|
case upb_FieldType_Int64:
|
|
|
|
case upb_FieldType_SInt64:
|
|
|
|
case upb_FieldType_SFixed64:
|
|
|
|
return kUpb_CType_Int64;
|
|
|
|
case upb_FieldType_Int32:
|
|
|
|
case upb_FieldType_SFixed32:
|
|
|
|
case upb_FieldType_SInt32:
|
|
|
|
return kUpb_CType_Int32;
|
|
|
|
case upb_FieldType_UInt64:
|
|
|
|
case upb_FieldType_Fixed64:
|
|
|
|
return kUpb_CType_UInt64;
|
|
|
|
case upb_FieldType_UInt32:
|
|
|
|
case upb_FieldType_Fixed32:
|
|
|
|
return kUpb_CType_UInt32;
|
|
|
|
case upb_FieldType_Enum:
|
|
|
|
return kUpb_CType_Enum;
|
|
|
|
case upb_FieldType_Bool:
|
|
|
|
return kUpb_CType_Bool;
|
|
|
|
case upb_FieldType_String:
|
|
|
|
return kUpb_CType_String;
|
|
|
|
case upb_FieldType_Bytes:
|
|
|
|
return kUpb_CType_Bytes;
|
|
|
|
case upb_FieldType_Group:
|
|
|
|
case upb_FieldType_Message:
|
|
|
|
return kUpb_CType_Message;
|
|
|
|
}
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
|
|
|
|
|
|
|
|
uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
|
|
|
|
|
|
|
|
upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
|
|
|
|
|
|
|
|
uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsExtension(const upb_FieldDef* f) {
|
|
|
|
return f->is_extension_;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; }
|
|
|
|
|
|
|
|
const char* upb_FieldDef_Name(const upb_FieldDef* f) {
|
|
|
|
return shortdefname(f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
|
|
|
|
return f->json_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
|
|
|
|
return f->has_json_name_;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
|
|
|
|
return f->msgdef;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
|
|
|
|
return f->is_extension_ ? f->scope.extension_scope : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
|
|
|
|
return f->is_extension_ ? NULL : f->scope.oneof;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
|
|
|
|
const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
|
|
|
|
if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
|
|
|
|
return oneof;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
|
|
|
|
UPB_ASSERT(!upb_FieldDef_IsSubMessage(f));
|
|
|
|
upb_MessageValue ret;
|
|
|
|
|
|
|
|
switch (upb_FieldDef_CType(f)) {
|
|
|
|
case kUpb_CType_Bool:
|
|
|
|
return (upb_MessageValue){.bool_val = f->defaultval.boolean};
|
|
|
|
case kUpb_CType_Int64:
|
|
|
|
return (upb_MessageValue){.int64_val = f->defaultval.sint};
|
|
|
|
case kUpb_CType_UInt64:
|
|
|
|
return (upb_MessageValue){.uint64_val = f->defaultval.uint};
|
|
|
|
case kUpb_CType_Enum:
|
|
|
|
case kUpb_CType_Int32:
|
|
|
|
return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
|
|
|
|
case kUpb_CType_UInt32:
|
|
|
|
return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
|
|
|
|
case kUpb_CType_Float:
|
|
|
|
return (upb_MessageValue){.float_val = f->defaultval.flt};
|
|
|
|
case kUpb_CType_Double:
|
|
|
|
return (upb_MessageValue){.double_val = f->defaultval.dbl};
|
|
|
|
case kUpb_CType_String:
|
|
|
|
case kUpb_CType_Bytes: {
|
|
|
|
str_t* str = f->defaultval.str;
|
|
|
|
if (str) {
|
|
|
|
return (upb_MessageValue){
|
|
|
|
.str_val = (upb_StringView){.data = str->str, .size = str->len}};
|
|
|
|
} else {
|
|
|
|
return (upb_MessageValue){
|
|
|
|
.str_val = (upb_StringView){.data = NULL, .size = 0}};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
|
|
|
|
UPB_ASSERT(!upb_FieldDef_IsExtension(f));
|
|
|
|
return &f->msgdef->layout->fields[f->layout_index];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable(
|
|
|
|
const upb_FieldDef* f) {
|
|
|
|
UPB_ASSERT(upb_FieldDef_IsExtension(f));
|
|
|
|
return f->file->ext_layouts[f->layout_index];
|
|
|
|
}
|
|
|
|
|
|
|
|
bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
|
|
|
|
return f->proto3_optional_;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_CType(f) == kUpb_CType_Message;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsString(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_CType(f) == kUpb_CType_String ||
|
|
|
|
upb_FieldDef_CType(f) == kUpb_CType_Bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
|
|
|
|
return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
|
|
|
|
upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
|
|
|
|
|
|
|
|
bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
|
|
|
|
return upb_FieldDef_IsSubMessage(f) ||
|
|
|
|
upb_FieldDef_CType(f) == kUpb_CType_Enum;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_HasPresence(const upb_FieldDef* f) {
|
|
|
|
if (upb_FieldDef_IsRepeated(f)) return false;
|
|
|
|
return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_ContainingOneof(f) ||
|
|
|
|
f->file->syntax == kUpb_Syntax_Proto2;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool between(int32_t x, int32_t low, int32_t high) {
|
|
|
|
return x >= low && x <= high;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
|
|
|
|
bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
|
|
|
|
bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
|
|
|
|
|
|
|
|
bool upb_FieldDef_checkdescriptortype(int32_t type) {
|
|
|
|
return between(type, 1, 18);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_MessageDef
|
|
|
|
* *****************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_MessageOptions* upb_MessageDef_Options(
|
|
|
|
const upb_MessageDef* m) {
|
|
|
|
return m->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_MessageDef_HasOptions(const upb_MessageDef* m) {
|
|
|
|
return m->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_MessageDef_FullName(const upb_MessageDef* m) {
|
|
|
|
return m->full_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) {
|
|
|
|
return m->file;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) {
|
|
|
|
return m->containing_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_MessageDef_Name(const upb_MessageDef* m) {
|
|
|
|
return shortdefname(m->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) {
|
|
|
|
return m->file->syntax;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_MessageDef_FindFieldByNumberWithSize(
|
|
|
|
const upb_MessageDef* m, uint32_t i) {
|
|
|
|
upb_value val;
|
|
|
|
return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
|
|
|
|
const upb_MessageDef* m, const char* name, size_t len) {
|
|
|
|
upb_value val;
|
|
|
|
|
|
|
|
if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return unpack_def(val, UPB_DEFTYPE_FIELD);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
|
|
|
|
const upb_MessageDef* m, const char* name, size_t len) {
|
|
|
|
upb_value val;
|
|
|
|
|
|
|
|
if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return unpack_def(val, UPB_DEFTYPE_ONEOF);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m,
|
|
|
|
const char* name, size_t len,
|
|
|
|
const upb_FieldDef** out_f,
|
|
|
|
const upb_OneofDef** out_o) {
|
|
|
|
upb_value val;
|
|
|
|
|
|
|
|
if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* f = unpack_def(val, UPB_DEFTYPE_FIELD);
|
|
|
|
const upb_OneofDef* o = unpack_def(val, UPB_DEFTYPE_ONEOF);
|
|
|
|
if (out_f) *out_f = f;
|
|
|
|
if (out_o) *out_o = o;
|
|
|
|
return f || o; /* False if this was a JSON name. */
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize(
|
|
|
|
const upb_MessageDef* m, const char* name, size_t len) {
|
|
|
|
upb_value val;
|
|
|
|
const upb_FieldDef* f;
|
|
|
|
|
|
|
|
if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
f = unpack_def(val, UPB_DEFTYPE_FIELD);
|
|
|
|
if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
|
|
|
|
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; }
|
|
|
|
|
|
|
|
int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; }
|
|
|
|
|
|
|
|
int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) {
|
|
|
|
return m->real_oneof_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) {
|
|
|
|
return m->ext_range_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_FieldCount(const upb_MessageDef* m) {
|
|
|
|
return m->field_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_OneofCount(const upb_MessageDef* m) {
|
|
|
|
return m->oneof_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) {
|
|
|
|
return m->nested_msg_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) {
|
|
|
|
return m->nested_enum_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) {
|
|
|
|
return m->nested_ext_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_MessageDef_realoneofcount(const upb_MessageDef* m) {
|
|
|
|
return m->real_oneof_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) {
|
|
|
|
return m->layout;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
|
|
|
|
int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < m->ext_range_count);
|
|
|
|
return &m->ext_ranges[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < m->field_count);
|
|
|
|
return &m->fields[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < m->oneof_count);
|
|
|
|
return &m->oneofs[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m,
|
|
|
|
int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < m->nested_msg_count);
|
|
|
|
return &m->nested_msgs[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < m->nested_enum_count);
|
|
|
|
return &m->nested_enums[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m,
|
|
|
|
int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < m->nested_ext_count);
|
|
|
|
return &m->nested_exts[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) {
|
|
|
|
return m->well_known_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_OneofDef ***************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_OneofOptions* upb_OneofDef_Options(
|
|
|
|
const upb_OneofDef* o) {
|
|
|
|
return o->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_OneofDef_HasOptions(const upb_OneofDef* o) {
|
|
|
|
return o->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_OneofDef_Name(const upb_OneofDef* o) {
|
|
|
|
return shortdefname(o->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) {
|
|
|
|
return o->parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; }
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) {
|
|
|
|
UPB_ASSERT(i < o->field_count);
|
|
|
|
return o->fields[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; }
|
|
|
|
|
|
|
|
uint32_t upb_OneofDef_Index(const upb_OneofDef* o) {
|
|
|
|
// Compute index in our parent's array.
|
|
|
|
return o - o->parent->oneofs;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; }
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o,
|
|
|
|
const char* name,
|
|
|
|
size_t length) {
|
|
|
|
upb_value val;
|
|
|
|
return upb_strtable_lookup2(&o->ntof, name, length, &val)
|
|
|
|
? upb_value_getptr(val)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o,
|
|
|
|
uint32_t num) {
|
|
|
|
upb_value val;
|
|
|
|
return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_FileDef ****************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f) {
|
|
|
|
return f->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_FileDef_HasOptions(const upb_FileDef* f) {
|
|
|
|
return f->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; }
|
|
|
|
|
|
|
|
const char* upb_FileDef_Package(const upb_FileDef* f) { return f->package; }
|
|
|
|
|
|
|
|
upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; }
|
|
|
|
|
|
|
|
int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) {
|
|
|
|
return f->top_lvl_msg_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; }
|
|
|
|
|
|
|
|
int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) {
|
|
|
|
return f->public_dep_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) {
|
|
|
|
return f->weak_dep_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) {
|
|
|
|
return f->public_deps;
|
|
|
|
}
|
|
|
|
|
|
|
|
const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) {
|
|
|
|
return f->weak_deps;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) {
|
|
|
|
return f->top_lvl_enum_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) {
|
|
|
|
return f->top_lvl_ext_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; }
|
|
|
|
|
|
|
|
const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->dep_count);
|
|
|
|
return f->deps[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->public_dep_count);
|
|
|
|
return f->deps[f->public_deps[i]];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->public_dep_count);
|
|
|
|
return f->deps[f->weak_deps[i]];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count);
|
|
|
|
return &f->top_lvl_msgs[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count);
|
|
|
|
return &f->top_lvl_enums[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count);
|
|
|
|
return &f->top_lvl_exts[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) {
|
|
|
|
UPB_ASSERT(0 <= i && i < f->service_count);
|
|
|
|
return &f->services[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; }
|
|
|
|
|
|
|
|
/* upb_MethodDef **************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_MethodOptions* upb_MethodDef_Options(
|
|
|
|
const upb_MethodDef* m) {
|
|
|
|
return m->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_MethodDef_HasOptions(const upb_MethodDef* m) {
|
|
|
|
return m->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_MethodDef_FullName(const upb_MethodDef* m) {
|
|
|
|
return m->full_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_MethodDef_Name(const upb_MethodDef* m) {
|
|
|
|
return shortdefname(m->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) {
|
|
|
|
return m->service;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) {
|
|
|
|
return m->input_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) {
|
|
|
|
return m->output_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) {
|
|
|
|
return m->client_streaming;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) {
|
|
|
|
return m->server_streaming;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_ServiceDef *************************************************************/
|
|
|
|
|
|
|
|
const google_protobuf_ServiceOptions* upb_ServiceDef_Options(
|
|
|
|
const upb_ServiceDef* s) {
|
|
|
|
return s->opts;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) {
|
|
|
|
return s->opts != (void*)opt_default;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) {
|
|
|
|
return s->full_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* upb_ServiceDef_Name(const upb_ServiceDef* s) {
|
|
|
|
return shortdefname(s->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; }
|
|
|
|
|
|
|
|
const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) {
|
|
|
|
return s->file;
|
|
|
|
}
|
|
|
|
|
|
|
|
int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) {
|
|
|
|
return s->method_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) {
|
|
|
|
return i < 0 || i >= s->method_count ? NULL : &s->methods[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s,
|
|
|
|
const char* name) {
|
|
|
|
for (int i = 0; i < s->method_count; i++) {
|
|
|
|
if (strcmp(name, upb_MethodDef_Name(&s->methods[i])) == 0) {
|
|
|
|
return &s->methods[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_DefPool
|
|
|
|
* *****************************************************************/
|
|
|
|
|
|
|
|
void upb_DefPool_Free(upb_DefPool* s) {
|
|
|
|
upb_Arena_Free(s->arena);
|
|
|
|
upb_gfree(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_DefPool* upb_DefPool_New(void) {
|
|
|
|
upb_DefPool* s = upb_gmalloc(sizeof(*s));
|
|
|
|
|
|
|
|
if (!s) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->arena = upb_Arena_New();
|
|
|
|
s->bytes_loaded = 0;
|
|
|
|
|
|
|
|
if (!upb_strtable_init(&s->syms, 32, s->arena) ||
|
|
|
|
!upb_strtable_init(&s->files, 4, s->arena) ||
|
|
|
|
!upb_inttable_init(&s->exts, s->arena)) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->extreg = upb_ExtensionRegistry_New(s->arena);
|
|
|
|
if (!s->extreg) goto err;
|
|
|
|
return s;
|
|
|
|
|
|
|
|
err:
|
|
|
|
upb_Arena_Free(s->arena);
|
|
|
|
upb_gfree(s);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const void* symtab_lookup(const upb_DefPool* s, const char* sym,
|
|
|
|
upb_deftype_t type) {
|
|
|
|
upb_value v;
|
|
|
|
return upb_strtable_lookup(&s->syms, sym, &v) ? unpack_def(v, type) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const void* symtab_lookup2(const upb_DefPool* s, const char* sym,
|
|
|
|
size_t size, upb_deftype_t type) {
|
|
|
|
upb_value v;
|
|
|
|
return upb_strtable_lookup2(&s->syms, sym, size, &v) ? unpack_def(v, type)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s,
|
|
|
|
const char* sym) {
|
|
|
|
return symtab_lookup(s, sym, UPB_DEFTYPE_MSG);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize(
|
|
|
|
const upb_DefPool* s, const char* sym, size_t len) {
|
|
|
|
return symtab_lookup2(s, sym, len, UPB_DEFTYPE_MSG);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s,
|
|
|
|
const char* sym) {
|
|
|
|
return symtab_lookup(s, sym, UPB_DEFTYPE_ENUM);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s,
|
|
|
|
const char* sym) {
|
|
|
|
return symtab_lookup(s, sym, UPB_DEFTYPE_ENUMVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s,
|
|
|
|
const char* name) {
|
|
|
|
upb_value v;
|
|
|
|
return upb_strtable_lookup(&s->files, name, &v)
|
|
|
|
? unpack_def(v, UPB_DEFTYPE_FILE)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s,
|
|
|
|
const char* name,
|
|
|
|
size_t len) {
|
|
|
|
upb_value v;
|
|
|
|
return upb_strtable_lookup2(&s->files, name, len, &v)
|
|
|
|
? unpack_def(v, UPB_DEFTYPE_FILE)
|
|
|
|
: NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize(
|
|
|
|
const upb_DefPool* s, const char* name, size_t size) {
|
|
|
|
upb_value v;
|
|
|
|
if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL;
|
|
|
|
|
|
|
|
switch (deftype(v)) {
|
|
|
|
case UPB_DEFTYPE_FIELD:
|
|
|
|
return unpack_def(v, UPB_DEFTYPE_FIELD);
|
|
|
|
case UPB_DEFTYPE_MSG: {
|
|
|
|
const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
|
|
|
|
return m->in_message_set ? &m->nested_exts[0] : NULL;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s,
|
|
|
|
const char* sym) {
|
|
|
|
return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym));
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s,
|
|
|
|
const char* name) {
|
|
|
|
return symtab_lookup(s, name, UPB_DEFTYPE_SERVICE);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_DefPool_FindFileByNameforsym(const upb_DefPool* s,
|
|
|
|
const char* name) {
|
|
|
|
upb_value v;
|
|
|
|
// TODO(haberman): non-extension fields and oneofs.
|
|
|
|
if (upb_strtable_lookup(&s->syms, name, &v)) {
|
|
|
|
switch (deftype(v)) {
|
|
|
|
case UPB_DEFTYPE_EXT: {
|
|
|
|
const upb_FieldDef* f = unpack_def(v, UPB_DEFTYPE_EXT);
|
|
|
|
return upb_FieldDef_File(f);
|
|
|
|
}
|
|
|
|
case UPB_DEFTYPE_MSG: {
|
|
|
|
const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
|
|
|
|
return upb_MessageDef_File(m);
|
|
|
|
}
|
|
|
|
case UPB_DEFTYPE_ENUM: {
|
|
|
|
const upb_EnumDef* e = unpack_def(v, UPB_DEFTYPE_ENUM);
|
|
|
|
return upb_EnumDef_File(e);
|
|
|
|
}
|
|
|
|
case UPB_DEFTYPE_ENUMVAL: {
|
|
|
|
const upb_EnumValueDef* ev = unpack_def(v, UPB_DEFTYPE_ENUMVAL);
|
|
|
|
return upb_EnumDef_File(upb_EnumValueDef_Enum(ev));
|
|
|
|
}
|
|
|
|
case UPB_DEFTYPE_SERVICE: {
|
|
|
|
const upb_ServiceDef* service = unpack_def(v, UPB_DEFTYPE_SERVICE);
|
|
|
|
return upb_ServiceDef_File(service);
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* last_dot = strrchr(name, '.');
|
|
|
|
if (last_dot) {
|
|
|
|
const upb_MessageDef* parent =
|
|
|
|
upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name);
|
|
|
|
if (parent) {
|
|
|
|
const char* shortname = last_dot + 1;
|
|
|
|
if (upb_MessageDef_FindByNameWithSize(parent, shortname,
|
|
|
|
strlen(shortname), NULL, NULL)) {
|
|
|
|
return upb_MessageDef_File(parent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Code to build defs from descriptor protos. *********************************/
|
|
|
|
|
|
|
|
/* There is a question of how much validation to do here. It will be difficult
|
|
|
|
* to perfectly match the amount of validation performed by proto2. But since
|
|
|
|
* this code is used to directly build defs from Ruby (for example) we do need
|
|
|
|
* to validate important constraints like uniqueness of names and numbers. */
|
|
|
|
|
|
|
|
#define CHK_OOM(x) \
|
|
|
|
if (!(x)) { \
|
|
|
|
symtab_oomerr(ctx); \
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
upb_DefPool* symtab;
|
|
|
|
upb_FileDef* file; /* File we are building. */
|
|
|
|
upb_Arena* arena; /* Allocate defs here. */
|
|
|
|
upb_Arena* tmp_arena; /* For temporary allocations. */
|
|
|
|
const upb_MiniTable_File* layout; /* NULL if we should build layouts. */
|
|
|
|
int enum_count; /* Count of enums built so far. */
|
|
|
|
int msg_count; /* Count of messages built so far. */
|
|
|
|
int ext_count; /* Count of extensions built so far. */
|
|
|
|
upb_Status* status; /* Record errors here. */
|
|
|
|
jmp_buf err; /* longjmp() on error. */
|
|
|
|
} symtab_addctx;
|
|
|
|
|
|
|
|
UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3) static void symtab_errf(
|
|
|
|
symtab_addctx* ctx, const char* fmt, ...) {
|
|
|
|
va_list argp;
|
|
|
|
va_start(argp, fmt);
|
|
|
|
upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
|
|
|
|
va_end(argp);
|
|
|
|
UPB_LONGJMP(ctx->err, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NORETURN UPB_NOINLINE static void symtab_oomerr(symtab_addctx* ctx) {
|
|
|
|
upb_Status_setoom(ctx->status);
|
|
|
|
UPB_LONGJMP(ctx->err, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void* symtab_alloc(symtab_addctx* ctx, size_t bytes) {
|
|
|
|
if (bytes == 0) return NULL;
|
|
|
|
void* ret = upb_Arena_Malloc(ctx->arena, bytes);
|
|
|
|
if (!ret) symtab_oomerr(ctx);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We want to copy the options verbatim into the destination options proto.
|
|
|
|
// We use serialize+parse as our deep copy.
|
|
|
|
#define SET_OPTIONS(target, desc_type, options_type, proto) \
|
|
|
|
if (google_protobuf_##desc_type##_has_options(proto)) { \
|
|
|
|
size_t size; \
|
|
|
|
char* pb = google_protobuf_##options_type##_serialize( \
|
|
|
|
google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \
|
|
|
|
CHK_OOM(pb); \
|
|
|
|
target = google_protobuf_##options_type##_parse(pb, size, ctx->arena); \
|
|
|
|
CHK_OOM(target); \
|
|
|
|
} else { \
|
|
|
|
target = (const google_protobuf_##options_type*)opt_default; \
|
|
|
|
}
|
|
|
|
|
|
|
|
static void check_ident(symtab_addctx* ctx, upb_StringView name, bool full) {
|
|
|
|
const char* str = name.data;
|
|
|
|
size_t len = name.size;
|
|
|
|
bool start = true;
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
char c = str[i];
|
|
|
|
if (c == '.') {
|
|
|
|
if (start || !full) {
|
|
|
|
symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
|
|
|
|
}
|
|
|
|
start = true;
|
|
|
|
} else if (start) {
|
|
|
|
if (!upb_isletter(c)) {
|
|
|
|
symtab_errf(
|
|
|
|
ctx,
|
|
|
|
"invalid name: path components must start with a letter (%.*s)",
|
|
|
|
(int)len, str);
|
|
|
|
}
|
|
|
|
start = false;
|
|
|
|
} else {
|
|
|
|
if (!upb_isalphanum(c)) {
|
|
|
|
symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
|
|
|
|
(int)len, str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (start) {
|
|
|
|
symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; }
|
|
|
|
|
|
|
|
static size_t upb_MessageValue_sizeof(upb_CType type) {
|
|
|
|
switch (type) {
|
|
|
|
case kUpb_CType_Double:
|
|
|
|
case kUpb_CType_Int64:
|
|
|
|
case kUpb_CType_UInt64:
|
|
|
|
return 8;
|
|
|
|
case kUpb_CType_Enum:
|
|
|
|
case kUpb_CType_Int32:
|
|
|
|
case kUpb_CType_UInt32:
|
|
|
|
case kUpb_CType_Float:
|
|
|
|
return 4;
|
|
|
|
case kUpb_CType_Bool:
|
|
|
|
return 1;
|
|
|
|
case kUpb_CType_Message:
|
|
|
|
return sizeof(void*);
|
|
|
|
case kUpb_CType_Bytes:
|
|
|
|
case kUpb_CType_String:
|
|
|
|
return sizeof(upb_StringView);
|
|
|
|
}
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) {
|
|
|
|
if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) {
|
|
|
|
upb_MapEntry ent;
|
|
|
|
UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
|
|
|
|
return sizeof(ent.k);
|
|
|
|
} else if (upb_FieldDef_IsRepeated(f)) {
|
|
|
|
return sizeof(void*);
|
|
|
|
} else {
|
|
|
|
return upb_MessageValue_sizeof(upb_FieldDef_CType(f));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t upb_MiniTable_place(symtab_addctx* ctx, upb_MiniTable* l,
|
|
|
|
size_t size, const upb_MessageDef* m) {
|
|
|
|
size_t ofs = UPB_ALIGN_UP(l->size, size);
|
|
|
|
size_t next = ofs + size;
|
|
|
|
|
|
|
|
if (next > UINT16_MAX) {
|
|
|
|
symtab_errf(ctx, "size of message %s exceeded max size of %zu bytes",
|
|
|
|
upb_MessageDef_FullName(m), (size_t)UINT16_MAX);
|
|
|
|
}
|
|
|
|
|
|
|
|
l->size = next;
|
|
|
|
return ofs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int field_number_cmp(const void* p1, const void* p2) {
|
|
|
|
const upb_MiniTable_Field* f1 = p1;
|
|
|
|
const upb_MiniTable_Field* f2 = p2;
|
|
|
|
return f1->number - f2->number;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l,
|
|
|
|
upb_MiniTable_Field* fields) {
|
|
|
|
int i;
|
|
|
|
int n = upb_MessageDef_numfields(m);
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
int dense_below = 0;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_FindFieldByNumberWithSize(
|
|
|
|
m, fields[i].number);
|
|
|
|
UPB_ASSERT(f);
|
|
|
|
f->layout_index = i;
|
|
|
|
if (i < UINT8_MAX && fields[i].number == i + 1 &&
|
|
|
|
(i == 0 || fields[i - 1].number == i)) {
|
|
|
|
dense_below = i + 1;
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
}
|
|
|
|
}
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
l->dense_below = dense_below;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t map_descriptortype(const upb_FieldDef* f) {
|
|
|
|
uint8_t type = upb_FieldDef_Type(f);
|
|
|
|
/* See TableDescriptorType() in upbc/generator.cc for details and
|
|
|
|
* rationale of these exceptions. */
|
|
|
|
if (type == upb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) {
|
|
|
|
return upb_FieldType_Bytes;
|
|
|
|
} else if (type == upb_FieldType_Enum &&
|
|
|
|
f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3) {
|
|
|
|
return upb_FieldType_Int32;
|
|
|
|
}
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fill_fieldlayout(upb_MiniTable_Field* field,
|
|
|
|
const upb_FieldDef* f) {
|
|
|
|
field->number = upb_FieldDef_Number(f);
|
|
|
|
field->descriptortype = map_descriptortype(f);
|
|
|
|
|
|
|
|
if (upb_FieldDef_IsMap(f)) {
|
|
|
|
field->mode =
|
|
|
|
kUpb_FieldMode_Map | (upb_FieldRep_Pointer << upb_FieldRep_Shift);
|
|
|
|
} else if (upb_FieldDef_IsRepeated(f)) {
|
|
|
|
field->mode =
|
|
|
|
kUpb_FieldMode_Array | (upb_FieldRep_Pointer << upb_FieldRep_Shift);
|
|
|
|
} else {
|
|
|
|
/* Maps descriptor type -> elem_size_lg2. */
|
|
|
|
static const uint8_t sizes[] = {
|
|
|
|
-1, /* invalid descriptor type */
|
|
|
|
upb_FieldRep_8Byte, /* DOUBLE */
|
|
|
|
upb_FieldRep_4Byte, /* FLOAT */
|
|
|
|
upb_FieldRep_8Byte, /* INT64 */
|
|
|
|
upb_FieldRep_8Byte, /* UINT64 */
|
|
|
|
upb_FieldRep_4Byte, /* INT32 */
|
|
|
|
upb_FieldRep_8Byte, /* FIXED64 */
|
|
|
|
upb_FieldRep_4Byte, /* FIXED32 */
|
|
|
|
upb_FieldRep_1Byte, /* BOOL */
|
|
|
|
upb_FieldRep_StringView, /* STRING */
|
|
|
|
upb_FieldRep_Pointer, /* GROUP */
|
|
|
|
upb_FieldRep_Pointer, /* MESSAGE */
|
|
|
|
upb_FieldRep_StringView, /* BYTES */
|
|
|
|
upb_FieldRep_4Byte, /* UINT32 */
|
|
|
|
upb_FieldRep_4Byte, /* ENUM */
|
|
|
|
upb_FieldRep_4Byte, /* SFIXED32 */
|
|
|
|
upb_FieldRep_8Byte, /* SFIXED64 */
|
|
|
|
upb_FieldRep_4Byte, /* SINT32 */
|
|
|
|
upb_FieldRep_8Byte, /* SINT64 */
|
|
|
|
};
|
|
|
|
field->mode = kUpb_FieldMode_Scalar |
|
|
|
|
(sizes[field->descriptortype] << upb_FieldRep_Shift);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (upb_FieldDef_IsPacked(f)) {
|
|
|
|
field->mode |= upb_LabelFlags_IsPacked;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (upb_FieldDef_IsExtension(f)) {
|
|
|
|
field->mode |= upb_LabelFlags_IsExtension;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
|
|
|
|
* It computes a dynamic layout for all of the fields in |m|. */
|
|
|
|
static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
|
|
|
|
upb_MiniTable* l = (upb_MiniTable*)m->layout;
|
|
|
|
size_t field_count = upb_MessageDef_numfields(m);
|
|
|
|
size_t sublayout_count = 0;
|
|
|
|
upb_MiniTable_Sub* subs;
|
|
|
|
upb_MiniTable_Field* fields;
|
|
|
|
|
|
|
|
memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry));
|
|
|
|
|
|
|
|
/* Count sub-messages. */
|
|
|
|
for (size_t i = 0; i < field_count; i++) {
|
|
|
|
const upb_FieldDef* f = &m->fields[i];
|
|
|
|
if (upb_FieldDef_IsSubMessage(f)) {
|
|
|
|
sublayout_count++;
|
|
|
|
}
|
|
|
|
if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
|
|
|
|
f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) {
|
|
|
|
sublayout_count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fields = symtab_alloc(ctx, field_count * sizeof(*fields));
|
|
|
|
subs = symtab_alloc(ctx, sublayout_count * sizeof(*subs));
|
|
|
|
|
|
|
|
l->field_count = upb_MessageDef_numfields(m);
|
|
|
|
l->fields = fields;
|
|
|
|
l->subs = subs;
|
|
|
|
l->table_mask = 0;
|
|
|
|
l->required_count = 0;
|
|
|
|
|
|
|
|
if (upb_MessageDef_ExtensionRangeCount(m) > 0) {
|
|
|
|
if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) {
|
|
|
|
l->ext = upb_ExtMode_IsMessageSet;
|
|
|
|
} else {
|
|
|
|
l->ext = upb_ExtMode_Extendable;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
l->ext = upb_ExtMode_NonExtendable;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* TODO(haberman): initialize fast tables so that reflection-based parsing
|
|
|
|
* can get the same speeds as linked-in types. */
|
|
|
|
l->fasttable[0].field_parser = &fastdecode_generic;
|
|
|
|
l->fasttable[0].field_data = 0;
|
|
|
|
|
|
|
|
if (upb_MessageDef_IsMapEntry(m)) {
|
|
|
|
/* TODO(haberman): refactor this method so this special case is more
|
|
|
|
* elegant. */
|
|
|
|
const upb_FieldDef* key = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
|
|
|
|
const upb_FieldDef* val = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
|
|
|
|
fields[0].number = 1;
|
|
|
|
fields[1].number = 2;
|
|
|
|
fields[0].mode = kUpb_FieldMode_Scalar;
|
|
|
|
fields[1].mode = kUpb_FieldMode_Scalar;
|
|
|
|
fields[0].presence = 0;
|
|
|
|
fields[1].presence = 0;
|
|
|
|
fields[0].descriptortype = map_descriptortype(key);
|
|
|
|
fields[1].descriptortype = map_descriptortype(val);
|
|
|
|
fields[0].offset = 0;
|
|
|
|
fields[1].offset = sizeof(upb_StringView);
|
|
|
|
fields[1].submsg_index = 0;
|
|
|
|
|
|
|
|
if (upb_FieldDef_CType(val) == kUpb_CType_Message) {
|
|
|
|
subs[0].submsg = upb_FieldDef_MessageSubDef(val)->layout;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_FieldDef* fielddefs = (upb_FieldDef*)&m->fields[0];
|
|
|
|
UPB_ASSERT(fielddefs[0].number_ == 1);
|
|
|
|
UPB_ASSERT(fielddefs[1].number_ == 2);
|
|
|
|
fielddefs[0].layout_index = 0;
|
|
|
|
fielddefs[1].layout_index = 1;
|
|
|
|
|
|
|
|
l->field_count = 2;
|
|
|
|
l->size = 2 * sizeof(upb_StringView);
|
|
|
|
l->size = UPB_ALIGN_UP(l->size, 8);
|
|
|
|
l->dense_below = 2;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate data offsets in three stages:
|
|
|
|
*
|
|
|
|
* 1. hasbits.
|
|
|
|
* 2. regular fields.
|
|
|
|
* 3. oneof fields.
|
|
|
|
*
|
|
|
|
* OPT: There is a lot of room for optimization here to minimize the size.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Assign hasbits for required fields first. */
|
|
|
|
size_t hasbit = 0;
|
|
|
|
|
|
|
|
for (int i = 0; i < m->field_count; i++) {
|
|
|
|
const upb_FieldDef* f = &m->fields[i];
|
|
|
|
upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
|
|
|
|
if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
|
|
|
|
field->presence = ++hasbit;
|
|
|
|
if (hasbit >= 63) {
|
|
|
|
symtab_errf(ctx, "Message with >=63 required fields: %s",
|
|
|
|
upb_MessageDef_FullName(m));
|
|
|
|
}
|
|
|
|
l->required_count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate hasbits and set basic field attributes. */
|
|
|
|
sublayout_count = 0;
|
|
|
|
for (int i = 0; i < m->field_count; i++) {
|
|
|
|
const upb_FieldDef* f = &m->fields[i];
|
|
|
|
upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
|
|
|
|
|
|
|
|
fill_fieldlayout(field, f);
|
|
|
|
|
|
|
|
if (upb_FieldDef_IsSubMessage(f)) {
|
|
|
|
field->submsg_index = sublayout_count++;
|
|
|
|
subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout;
|
|
|
|
} else if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
|
|
|
|
f->file->syntax == kUpb_Syntax_Proto2) {
|
|
|
|
field->submsg_index = sublayout_count++;
|
|
|
|
subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout;
|
|
|
|
UPB_ASSERT(subs[field->submsg_index].subenum);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
|
|
|
|
/* Hasbit was already assigned. */
|
|
|
|
} else if (upb_FieldDef_HasPresence(f) &&
|
|
|
|
!upb_FieldDef_RealContainingOneof(f)) {
|
|
|
|
/* We don't use hasbit 0, so that 0 can indicate "no presence" in the
|
|
|
|
* table. This wastes one hasbit, but we don't worry about it for now. */
|
|
|
|
field->presence = ++hasbit;
|
|
|
|
} else {
|
|
|
|
field->presence = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Account for space used by hasbits. */
|
|
|
|
l->size = div_round_up(hasbit, 8);
|
|
|
|
|
|
|
|
/* Allocate non-oneof fields. */
|
|
|
|
for (int i = 0; i < m->field_count; i++) {
|
|
|
|
const upb_FieldDef* f = &m->fields[i];
|
|
|
|
size_t field_size = upb_msg_fielddefsize(f);
|
|
|
|
size_t index = upb_FieldDef_Index(f);
|
|
|
|
|
|
|
|
if (upb_FieldDef_RealContainingOneof(f)) {
|
|
|
|
/* Oneofs are handled separately below. */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate oneof fields. Each oneof field consists of a uint32 for the case
|
|
|
|
* and space for the actual data. */
|
|
|
|
for (int i = 0; i < m->oneof_count; i++) {
|
|
|
|
const upb_OneofDef* o = &m->oneofs[i];
|
|
|
|
size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
|
|
|
|
size_t field_size = 0;
|
|
|
|
uint32_t case_offset;
|
|
|
|
uint32_t data_offset;
|
|
|
|
|
|
|
|
if (upb_OneofDef_IsSynthetic(o)) continue;
|
|
|
|
|
|
|
|
/* Calculate field size: the max of all field sizes. */
|
|
|
|
for (int j = 0; j < o->field_count; j++) {
|
|
|
|
const upb_FieldDef* f = o->fields[j];
|
|
|
|
field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Align and allocate case offset. */
|
|
|
|
case_offset = upb_MiniTable_place(ctx, l, case_size, m);
|
|
|
|
data_offset = upb_MiniTable_place(ctx, l, field_size, m);
|
|
|
|
|
|
|
|
for (int i = 0; i < o->field_count; i++) {
|
|
|
|
const upb_FieldDef* f = o->fields[i];
|
|
|
|
fields[upb_FieldDef_Index(f)].offset = data_offset;
|
|
|
|
fields[upb_FieldDef_Index(f)].presence = ~case_offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Size of the entire structure should be a multiple of its greatest
|
|
|
|
* alignment. TODO: track overall alignment for real? */
|
|
|
|
l->size = UPB_ALIGN_UP(l->size, 8);
|
|
|
|
|
|
|
|
/* Sort fields by number. */
|
|
|
|
qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields), field_number_cmp);
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
assign_layout_indices(m, l, fields);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char* strviewdup(symtab_addctx* ctx, upb_StringView view) {
|
|
|
|
char* ret = upb_strdup2(view.data, view.size, ctx->arena);
|
|
|
|
CHK_OOM(ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool streql2(const char* a, size_t n, const char* b) {
|
|
|
|
return n == strlen(b) && memcmp(a, b, n) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool streql_view(upb_StringView view, const char* b) {
|
|
|
|
return streql2(view.data, view.size, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char* makefullname(symtab_addctx* ctx, const char* prefix,
|
|
|
|
upb_StringView name) {
|
|
|
|
if (prefix) {
|
|
|
|
/* ret = prefix + '.' + name; */
|
|
|
|
size_t n = strlen(prefix);
|
|
|
|
char* ret = symtab_alloc(ctx, n + name.size + 2);
|
|
|
|
strcpy(ret, prefix);
|
|
|
|
ret[n] = '.';
|
|
|
|
memcpy(&ret[n + 1], name.data, name.size);
|
|
|
|
ret[n + 1 + name.size] = '\0';
|
|
|
|
return ret;
|
|
|
|
} else {
|
|
|
|
return strviewdup(ctx, name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void finalize_oneofs(symtab_addctx* ctx, upb_MessageDef* m) {
|
|
|
|
int i;
|
|
|
|
int synthetic_count = 0;
|
|
|
|
upb_OneofDef* mutable_oneofs = (upb_OneofDef*)m->oneofs;
|
|
|
|
|
|
|
|
for (i = 0; i < m->oneof_count; i++) {
|
|
|
|
upb_OneofDef* o = &mutable_oneofs[i];
|
|
|
|
|
|
|
|
if (o->synthetic && o->field_count != 1) {
|
|
|
|
symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
|
|
|
|
o->field_count, upb_OneofDef_Name(o));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (o->synthetic) {
|
|
|
|
synthetic_count++;
|
|
|
|
} else if (synthetic_count != 0) {
|
|
|
|
symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
|
|
|
|
upb_OneofDef_Name(o));
|
|
|
|
}
|
|
|
|
|
|
|
|
o->fields = symtab_alloc(ctx, sizeof(upb_FieldDef*) * o->field_count);
|
|
|
|
o->field_count = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < m->field_count; i++) {
|
|
|
|
const upb_FieldDef* f = &m->fields[i];
|
|
|
|
upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f);
|
|
|
|
if (o) {
|
|
|
|
o->fields[o->field_count++] = f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m->real_oneof_count = m->oneof_count - synthetic_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getjsonname(const char* name, char* buf, size_t len) {
|
|
|
|
size_t src, dst = 0;
|
|
|
|
bool ucase_next = false;
|
|
|
|
|
|
|
|
#define WRITE(byte) \
|
|
|
|
++dst; \
|
|
|
|
if (dst < len) \
|
|
|
|
buf[dst - 1] = byte; \
|
|
|
|
else if (dst == len) \
|
|
|
|
buf[dst - 1] = '\0'
|
|
|
|
|
|
|
|
if (!name) {
|
|
|
|
WRITE('\0');
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Implement the transformation as described in the spec:
|
|
|
|
* 1. upper case all letters after an underscore.
|
|
|
|
* 2. remove all underscores.
|
|
|
|
*/
|
|
|
|
for (src = 0; name[src]; src++) {
|
|
|
|
if (name[src] == '_') {
|
|
|
|
ucase_next = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ucase_next) {
|
|
|
|
WRITE(toupper(name[src]));
|
|
|
|
ucase_next = false;
|
|
|
|
} else {
|
|
|
|
WRITE(name[src]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
WRITE('\0');
|
|
|
|
return dst;
|
|
|
|
|
|
|
|
#undef WRITE
|
|
|
|
}
|
|
|
|
|
|
|
|
static char* makejsonname(symtab_addctx* ctx, const char* name) {
|
|
|
|
size_t size = getjsonname(name, NULL, 0);
|
|
|
|
char* json_name = symtab_alloc(ctx, size);
|
|
|
|
getjsonname(name, json_name, size);
|
|
|
|
return json_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Adds a symbol |v| to the symtab, which must be a def pointer previously
|
|
|
|
* packed with pack_def(). The def's pointer to upb_FileDef* must be set before
|
|
|
|
* adding, so we know which entries to remove if building this file fails. */
|
|
|
|
static void symtab_add(symtab_addctx* ctx, const char* name, upb_value v) {
|
|
|
|
// TODO: table should support an operation "tryinsert" to avoid the double
|
|
|
|
// lookup.
|
|
|
|
if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
|
|
|
|
symtab_errf(ctx, "duplicate symbol '%s'", name);
|
|
|
|
}
|
|
|
|
size_t len = strlen(name);
|
|
|
|
CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v,
|
|
|
|
ctx->symtab->arena));
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool remove_component(char* base, size_t* len) {
|
|
|
|
if (*len == 0) return false;
|
|
|
|
|
|
|
|
for (size_t i = *len - 1; i > 0; i--) {
|
|
|
|
if (base[i] == '.') {
|
|
|
|
*len = i;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*len = 0;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Given a symbol and the base symbol inside which it is defined, find the
|
|
|
|
* symbol's definition in t. */
|
|
|
|
static const void* symtab_resolveany(symtab_addctx* ctx,
|
|
|
|
const char* from_name_dbg,
|
|
|
|
const char* base, upb_StringView sym,
|
|
|
|
upb_deftype_t* type) {
|
|
|
|
const upb_strtable* t = &ctx->symtab->syms;
|
|
|
|
if (sym.size == 0) goto notfound;
|
|
|
|
upb_value v;
|
|
|
|
if (sym.data[0] == '.') {
|
|
|
|
/* Symbols starting with '.' are absolute, so we do a single lookup.
|
|
|
|
* Slice to omit the leading '.' */
|
|
|
|
if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
|
|
|
|
goto notfound;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Remove components from base until we find an entry or run out. */
|
|
|
|
size_t baselen = strlen(base);
|
|
|
|
char* tmp = malloc(sym.size + strlen(base) + 1);
|
|
|
|
while (1) {
|
|
|
|
char* p = tmp;
|
|
|
|
if (baselen) {
|
|
|
|
memcpy(p, base, baselen);
|
|
|
|
p[baselen] = '.';
|
|
|
|
p += baselen + 1;
|
|
|
|
}
|
|
|
|
memcpy(p, sym.data, sym.size);
|
|
|
|
p += sym.size;
|
|
|
|
if (upb_strtable_lookup2(t, tmp, p - tmp, &v)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!remove_component(tmp, &baselen)) {
|
|
|
|
free(tmp);
|
|
|
|
goto notfound;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
*type = deftype(v);
|
|
|
|
return unpack_def(v, *type);
|
|
|
|
|
|
|
|
notfound:
|
|
|
|
symtab_errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
|
|
|
|
UPB_STRINGVIEW_ARGS(sym));
|
|
|
|
}
|
|
|
|
|
|
|
|
static const void* symtab_resolve(symtab_addctx* ctx, const char* from_name_dbg,
|
|
|
|
const char* base, upb_StringView sym,
|
|
|
|
upb_deftype_t type) {
|
|
|
|
upb_deftype_t found_type;
|
|
|
|
const void* ret =
|
|
|
|
symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type);
|
|
|
|
if (ret && found_type != type) {
|
|
|
|
symtab_errf(
|
|
|
|
ctx,
|
|
|
|
"type mismatch when resolving %s: couldn't find name %s with type=%d",
|
|
|
|
from_name_dbg, sym.data, (int)type);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_oneofdef(
|
|
|
|
symtab_addctx* ctx, upb_MessageDef* m,
|
|
|
|
const google_protobuf_OneofDescriptorProto* oneof_proto,
|
|
|
|
const upb_OneofDef* _o) {
|
|
|
|
upb_OneofDef* o = (upb_OneofDef*)_o;
|
|
|
|
upb_StringView name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
|
|
|
|
upb_value v;
|
|
|
|
|
|
|
|
o->parent = m;
|
|
|
|
o->full_name = makefullname(ctx, m->full_name, name);
|
|
|
|
o->field_count = 0;
|
|
|
|
o->synthetic = false;
|
|
|
|
|
|
|
|
SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto);
|
|
|
|
|
|
|
|
v = pack_def(o, UPB_DEFTYPE_ONEOF);
|
|
|
|
CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena));
|
|
|
|
|
|
|
|
CHK_OOM(upb_inttable_init(&o->itof, ctx->arena));
|
|
|
|
CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena));
|
|
|
|
}
|
|
|
|
|
|
|
|
static str_t* newstr(symtab_addctx* ctx, const char* data, size_t len) {
|
|
|
|
str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
|
|
|
|
CHK_OOM(ret);
|
|
|
|
ret->len = len;
|
|
|
|
if (len) memcpy(ret->str, data, len);
|
|
|
|
ret->str[len] = '\0';
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool upb_DefPool_TryGetChar(const char** src, const char* end,
|
|
|
|
char* ch) {
|
|
|
|
if (*src == end) return false;
|
|
|
|
*ch = **src;
|
|
|
|
*src += 1;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char upb_DefPool_TryGetHexDigit(symtab_addctx* ctx,
|
|
|
|
const upb_FieldDef* f, const char** src,
|
|
|
|
const char* end) {
|
|
|
|
char ch;
|
|
|
|
if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
|
|
|
|
if ('0' <= ch && ch <= '9') {
|
|
|
|
return ch - '0';
|
|
|
|
}
|
|
|
|
ch = upb_ascii_lower(ch);
|
|
|
|
if ('a' <= ch && ch <= 'f') {
|
|
|
|
return ch - 'a' + 0xa;
|
|
|
|
}
|
|
|
|
*src -= 1; // Char wasn't actually a hex digit.
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char upb_DefPool_ParseHexEscape(symtab_addctx* ctx,
|
|
|
|
const upb_FieldDef* f, const char** src,
|
|
|
|
const char* end) {
|
|
|
|
char hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end);
|
|
|
|
if (hex_digit < 0) {
|
|
|
|
symtab_errf(ctx,
|
|
|
|
"\\x cannot be followed by non-hex digit in field '%s' default",
|
|
|
|
upb_FieldDef_FullName(f));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
unsigned int ret = hex_digit;
|
|
|
|
while ((hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end)) >= 0) {
|
|
|
|
ret = (ret << 4) | hex_digit;
|
|
|
|
}
|
|
|
|
if (ret > 0xff) {
|
|
|
|
symtab_errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
|
|
|
|
upb_FieldDef_FullName(f));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
char upb_DefPool_TryGetOctalDigit(const char** src, const char* end) {
|
|
|
|
char ch;
|
|
|
|
if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
|
|
|
|
if ('0' <= ch && ch <= '7') {
|
|
|
|
return ch - '0';
|
|
|
|
}
|
|
|
|
*src -= 1; // Char wasn't actually an octal digit.
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char upb_DefPool_ParseOctalEscape(symtab_addctx* ctx,
|
|
|
|
const upb_FieldDef* f,
|
|
|
|
const char** src, const char* end) {
|
|
|
|
char ch = 0;
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
|
|
char digit;
|
|
|
|
if ((digit = upb_DefPool_TryGetOctalDigit(src, end)) >= 0) {
|
|
|
|
ch = (ch << 3) | digit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char upb_DefPool_ParseEscape(symtab_addctx* ctx, const upb_FieldDef* f,
|
|
|
|
const char** src, const char* end) {
|
|
|
|
char ch;
|
|
|
|
if (!upb_DefPool_TryGetChar(src, end, &ch)) {
|
|
|
|
symtab_errf(ctx, "unterminated escape sequence in field %s",
|
|
|
|
upb_FieldDef_FullName(f));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
switch (ch) {
|
|
|
|
case 'a':
|
|
|
|
return '\a';
|
|
|
|
case 'b':
|
|
|
|
return '\b';
|
|
|
|
case 'f':
|
|
|
|
return '\f';
|
|
|
|
case 'n':
|
|
|
|
return '\n';
|
|
|
|
case 'r':
|
|
|
|
return '\r';
|
|
|
|
case 't':
|
|
|
|
return '\t';
|
|
|
|
case 'v':
|
|
|
|
return '\v';
|
|
|
|
case '\\':
|
|
|
|
return '\\';
|
|
|
|
case '\'':
|
|
|
|
return '\'';
|
|
|
|
case '\"':
|
|
|
|
return '\"';
|
|
|
|
case '?':
|
|
|
|
return '\?';
|
|
|
|
case 'x':
|
|
|
|
case 'X':
|
|
|
|
return upb_DefPool_ParseHexEscape(ctx, f, src, end);
|
|
|
|
case '0':
|
|
|
|
case '1':
|
|
|
|
case '2':
|
|
|
|
case '3':
|
|
|
|
case '4':
|
|
|
|
case '5':
|
|
|
|
case '6':
|
|
|
|
case '7':
|
|
|
|
*src -= 1;
|
|
|
|
return upb_DefPool_ParseOctalEscape(ctx, f, src, end);
|
|
|
|
}
|
|
|
|
symtab_errf(ctx, "Unknown escape sequence: \\%c", ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
static str_t* unescape(symtab_addctx* ctx, const upb_FieldDef* f,
|
|
|
|
const char* data, size_t len) {
|
|
|
|
// Size here is an upper bound; escape sequences could ultimately shrink it.
|
|
|
|
str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
|
|
|
|
char* dst = &ret->str[0];
|
|
|
|
const char* src = data;
|
|
|
|
const char* end = data + len;
|
|
|
|
|
|
|
|
while (src < end) {
|
|
|
|
if (*src == '\\') {
|
|
|
|
src++;
|
|
|
|
*dst++ = upb_DefPool_ParseEscape(ctx, f, &src, end);
|
|
|
|
} else {
|
|
|
|
*dst++ = *src++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret->len = dst - &ret->str[0];
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_default(symtab_addctx* ctx, const char* str, size_t len,
|
|
|
|
upb_FieldDef* f) {
|
|
|
|
char* end;
|
|
|
|
char nullz[64];
|
|
|
|
errno = 0;
|
|
|
|
|
|
|
|
switch (upb_FieldDef_CType(f)) {
|
|
|
|
case kUpb_CType_Int32:
|
|
|
|
case kUpb_CType_Int64:
|
|
|
|
case kUpb_CType_UInt32:
|
|
|
|
case kUpb_CType_UInt64:
|
|
|
|
case kUpb_CType_Double:
|
|
|
|
case kUpb_CType_Float:
|
|
|
|
/* Standard C number parsing functions expect null-terminated strings. */
|
|
|
|
if (len >= sizeof(nullz) - 1) {
|
|
|
|
symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
|
|
|
|
}
|
|
|
|
memcpy(nullz, str, len);
|
|
|
|
nullz[len] = '\0';
|
|
|
|
str = nullz;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (upb_FieldDef_CType(f)) {
|
|
|
|
case kUpb_CType_Int32: {
|
|
|
|
long val = strtol(str, &end, 0);
|
|
|
|
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.sint = val;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_Enum: {
|
|
|
|
const upb_EnumDef* e = f->sub.enumdef;
|
|
|
|
const upb_EnumValueDef* ev =
|
|
|
|
upb_EnumDef_FindValueByNameWithSize(e, str, len);
|
|
|
|
if (!ev) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.sint = ev->number;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_Int64: {
|
|
|
|
long long val = strtoll(str, &end, 0);
|
|
|
|
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.sint = val;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_UInt32: {
|
|
|
|
unsigned long val = strtoul(str, &end, 0);
|
|
|
|
if (val > UINT32_MAX || errno == ERANGE || *end) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.uint = val;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_UInt64: {
|
|
|
|
unsigned long long val = strtoull(str, &end, 0);
|
|
|
|
if (val > UINT64_MAX || errno == ERANGE || *end) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.uint = val;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_Double: {
|
|
|
|
double val = strtod(str, &end);
|
|
|
|
if (errno == ERANGE || *end) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.dbl = val;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_Float: {
|
|
|
|
float val = strtof(str, &end);
|
|
|
|
if (errno == ERANGE || *end) {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
f->defaultval.flt = val;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_Bool: {
|
|
|
|
if (streql2(str, len, "false")) {
|
|
|
|
f->defaultval.boolean = false;
|
|
|
|
} else if (streql2(str, len, "true")) {
|
|
|
|
f->defaultval.boolean = true;
|
|
|
|
} else {
|
|
|
|
goto invalid;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case kUpb_CType_String:
|
|
|
|
f->defaultval.str = newstr(ctx, str, len);
|
|
|
|
break;
|
|
|
|
case kUpb_CType_Bytes:
|
|
|
|
f->defaultval.str = unescape(ctx, f, str, len);
|
|
|
|
break;
|
|
|
|
case kUpb_CType_Message:
|
|
|
|
/* Should not have a default value. */
|
|
|
|
symtab_errf(ctx, "Message should not have a default (%s)",
|
|
|
|
upb_FieldDef_FullName(f));
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
invalid:
|
|
|
|
symtab_errf(ctx, "Invalid default '%.*s' for field %s of type %d", (int)len,
|
|
|
|
str, upb_FieldDef_FullName(f), (int)upb_FieldDef_Type(f));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_default_default(symtab_addctx* ctx, upb_FieldDef* f) {
|
|
|
|
switch (upb_FieldDef_CType(f)) {
|
|
|
|
case kUpb_CType_Int32:
|
|
|
|
case kUpb_CType_Int64:
|
|
|
|
f->defaultval.sint = 0;
|
|
|
|
break;
|
|
|
|
case kUpb_CType_UInt64:
|
|
|
|
case kUpb_CType_UInt32:
|
|
|
|
f->defaultval.uint = 0;
|
|
|
|
break;
|
|
|
|
case kUpb_CType_Double:
|
|
|
|
case kUpb_CType_Float:
|
|
|
|
f->defaultval.dbl = 0;
|
|
|
|
break;
|
|
|
|
case kUpb_CType_String:
|
|
|
|
case kUpb_CType_Bytes:
|
|
|
|
f->defaultval.str = newstr(ctx, NULL, 0);
|
|
|
|
break;
|
|
|
|
case kUpb_CType_Bool:
|
|
|
|
f->defaultval.boolean = false;
|
|
|
|
break;
|
|
|
|
case kUpb_CType_Enum:
|
|
|
|
f->defaultval.sint = f->sub.enumdef->values[0].number;
|
|
|
|
case kUpb_CType_Message:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_fielddef(
|
|
|
|
symtab_addctx* ctx, const char* prefix, upb_MessageDef* m,
|
|
|
|
const google_protobuf_FieldDescriptorProto* field_proto,
|
|
|
|
const upb_FieldDef* _f, bool is_extension) {
|
|
|
|
upb_FieldDef* f = (upb_FieldDef*)_f;
|
|
|
|
upb_StringView name;
|
|
|
|
const char* full_name;
|
|
|
|
const char* json_name;
|
|
|
|
const char* shortname;
|
|
|
|
int32_t field_number;
|
|
|
|
|
|
|
|
f->file = ctx->file; /* Must happen prior to symtab_add(). */
|
|
|
|
|
|
|
|
if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
|
|
|
|
symtab_errf(ctx, "field has no name (%s)", upb_MessageDef_FullName(m));
|
|
|
|
}
|
|
|
|
|
|
|
|
name = google_protobuf_FieldDescriptorProto_name(field_proto);
|
|
|
|
check_ident(ctx, name, false);
|
|
|
|
full_name = makefullname(ctx, prefix, name);
|
|
|
|
shortname = shortdefname(full_name);
|
|
|
|
|
|
|
|
if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
|
|
|
|
json_name = strviewdup(
|
|
|
|
ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
|
|
|
|
f->has_json_name_ = true;
|
|
|
|
} else {
|
|
|
|
json_name = makejsonname(ctx, shortname);
|
|
|
|
f->has_json_name_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
|
|
|
|
|
|
|
|
f->full_name = full_name;
|
|
|
|
f->json_name = json_name;
|
|
|
|
f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
|
|
|
|
f->number_ = field_number;
|
|
|
|
f->scope.oneof = NULL;
|
|
|
|
f->proto3_optional_ =
|
|
|
|
google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
|
|
|
|
|
|
|
|
bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto);
|
|
|
|
bool has_type_name =
|
|
|
|
google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
|
|
|
|
|
|
|
|
f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
|
|
|
|
|
|
|
|
if (has_type) {
|
|
|
|
switch (f->type_) {
|
|
|
|
case upb_FieldType_Message:
|
|
|
|
case upb_FieldType_Group:
|
|
|
|
case upb_FieldType_Enum:
|
|
|
|
if (!has_type_name) {
|
|
|
|
symtab_errf(ctx, "field of type %d requires type name (%s)",
|
|
|
|
(int)f->type_, full_name);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (has_type_name) {
|
|
|
|
symtab_errf(ctx, "invalid type for field with type_name set (%s, %d)",
|
|
|
|
full_name, (int)f->type_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (has_type_name) {
|
|
|
|
f->type_ =
|
|
|
|
FIELD_TYPE_UNSPECIFIED; // We'll fill this in in resolve_fielddef().
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_extension) {
|
|
|
|
/* direct message field. */
|
|
|
|
upb_value v, field_v, json_v, existing_v;
|
|
|
|
size_t json_size;
|
|
|
|
|
|
|
|
if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) {
|
|
|
|
symtab_errf(ctx, "invalid field number (%u)", field_number);
|
|
|
|
}
|
|
|
|
|
|
|
|
f->index_ = f - m->fields;
|
|
|
|
f->msgdef = m;
|
|
|
|
f->is_extension_ = false;
|
|
|
|
|
|
|
|
field_v = pack_def(f, UPB_DEFTYPE_FIELD);
|
|
|
|
json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
|
|
|
|
v = upb_value_constptr(f);
|
|
|
|
json_size = strlen(json_name);
|
|
|
|
|
|
|
|
if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) {
|
|
|
|
symtab_errf(ctx, "duplicate field name (%s)", shortname);
|
|
|
|
}
|
|
|
|
|
|
|
|
CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v,
|
|
|
|
ctx->arena));
|
|
|
|
|
|
|
|
if (strcmp(shortname, json_name) != 0) {
|
|
|
|
if (upb_strtable_lookup(&m->ntof, json_name, &v)) {
|
|
|
|
symtab_errf(ctx, "duplicate json_name (%s)", json_name);
|
|
|
|
} else {
|
|
|
|
CHK_OOM(upb_strtable_insert(&m->ntof, json_name, json_size, json_v,
|
|
|
|
ctx->arena));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
|
|
|
|
symtab_errf(ctx, "duplicate field number (%u)", field_number);
|
|
|
|
}
|
|
|
|
|
|
|
|
CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena));
|
|
|
|
|
|
|
|
if (ctx->layout) {
|
|
|
|
const upb_MiniTable_Field* fields = m->layout->fields;
|
|
|
|
int count = m->layout->field_count;
|
|
|
|
bool found = false;
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
|
|
if (fields[i].number == field_number) {
|
|
|
|
f->layout_index = i;
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
UPB_ASSERT(found);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* extension field. */
|
|
|
|
f->is_extension_ = true;
|
|
|
|
f->scope.extension_scope = m;
|
|
|
|
symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_EXT));
|
|
|
|
f->layout_index = ctx->ext_count++;
|
|
|
|
if (ctx->layout) {
|
|
|
|
UPB_ASSERT(ctx->file->ext_layouts[f->layout_index]->field.number ==
|
|
|
|
field_number);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (f->type_ < upb_FieldType_Double || f->type_ > upb_FieldType_SInt64) {
|
|
|
|
symtab_errf(ctx, "invalid type for field %s (%d)", f->full_name, f->type_);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
|
|
|
|
symtab_errf(ctx, "invalid label for field %s (%d)", f->full_name,
|
|
|
|
f->label_);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We can't resolve the subdef or (in the case of extensions) the containing
|
|
|
|
* message yet, because it may not have been defined yet. We stash a pointer
|
|
|
|
* to the field_proto until later when we can properly resolve it. */
|
|
|
|
f->sub.unresolved = field_proto;
|
|
|
|
|
|
|
|
if (f->label_ == kUpb_Label_Required &&
|
|
|
|
f->file->syntax == kUpb_Syntax_Proto3) {
|
|
|
|
symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
|
|
|
|
int oneof_index =
|
|
|
|
google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
|
|
|
|
upb_OneofDef* oneof;
|
|
|
|
upb_value v = upb_value_constptr(f);
|
|
|
|
|
|
|
|
if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
|
|
|
|
symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
|
|
|
|
f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!m) {
|
|
|
|
symtab_errf(ctx, "oneof_index provided for extension field (%s)",
|
|
|
|
f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (oneof_index >= m->oneof_count) {
|
|
|
|
symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
oneof = (upb_OneofDef*)&m->oneofs[oneof_index];
|
|
|
|
f->scope.oneof = oneof;
|
|
|
|
|
|
|
|
oneof->field_count++;
|
|
|
|
if (f->proto3_optional_) {
|
|
|
|
oneof->synthetic = true;
|
|
|
|
}
|
|
|
|
CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena));
|
|
|
|
CHK_OOM(
|
|
|
|
upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena));
|
|
|
|
} else {
|
|
|
|
if (f->proto3_optional_) {
|
|
|
|
symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
|
|
|
|
f->full_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
|
|
|
|
|
|
|
|
if (google_protobuf_FieldOptions_has_packed(f->opts)) {
|
|
|
|
f->packed_ = google_protobuf_FieldOptions_packed(f->opts);
|
|
|
|
} else {
|
|
|
|
/* Repeated fields default to packed for proto3 only. */
|
|
|
|
f->packed_ = upb_FieldDef_IsPrimitive(f) &&
|
|
|
|
f->label_ == kUpb_Label_Repeated &&
|
|
|
|
f->file->syntax == kUpb_Syntax_Proto3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_service(
|
|
|
|
symtab_addctx* ctx, const google_protobuf_ServiceDescriptorProto* svc_proto,
|
|
|
|
const upb_ServiceDef* _s) {
|
|
|
|
upb_ServiceDef* s = (upb_ServiceDef*)_s;
|
|
|
|
upb_StringView name;
|
|
|
|
const google_protobuf_MethodDescriptorProto* const* methods;
|
|
|
|
size_t i, n;
|
|
|
|
|
|
|
|
s->file = ctx->file; /* Must happen prior to symtab_add. */
|
|
|
|
|
|
|
|
name = google_protobuf_ServiceDescriptorProto_name(svc_proto);
|
|
|
|
check_ident(ctx, name, false);
|
|
|
|
s->full_name = makefullname(ctx, ctx->file->package, name);
|
|
|
|
symtab_add(ctx, s->full_name, pack_def(s, UPB_DEFTYPE_SERVICE));
|
|
|
|
|
|
|
|
methods = google_protobuf_ServiceDescriptorProto_method(svc_proto, &n);
|
|
|
|
|
|
|
|
s->method_count = n;
|
|
|
|
s->methods = symtab_alloc(ctx, sizeof(*s->methods) * n);
|
|
|
|
|
|
|
|
SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, svc_proto);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
const google_protobuf_MethodDescriptorProto* method_proto = methods[i];
|
|
|
|
upb_MethodDef* m = (upb_MethodDef*)&s->methods[i];
|
|
|
|
upb_StringView name =
|
|
|
|
google_protobuf_MethodDescriptorProto_name(method_proto);
|
|
|
|
|
|
|
|
m->service = s;
|
|
|
|
m->full_name = makefullname(ctx, s->full_name, name);
|
|
|
|
m->client_streaming =
|
|
|
|
google_protobuf_MethodDescriptorProto_client_streaming(method_proto);
|
|
|
|
m->server_streaming =
|
|
|
|
google_protobuf_MethodDescriptorProto_server_streaming(method_proto);
|
|
|
|
m->input_type = symtab_resolve(
|
|
|
|
ctx, m->full_name, m->full_name,
|
|
|
|
google_protobuf_MethodDescriptorProto_input_type(method_proto),
|
|
|
|
UPB_DEFTYPE_MSG);
|
|
|
|
m->output_type = symtab_resolve(
|
|
|
|
ctx, m->full_name, m->full_name,
|
|
|
|
google_protobuf_MethodDescriptorProto_output_type(method_proto),
|
|
|
|
UPB_DEFTYPE_MSG);
|
|
|
|
|
|
|
|
SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, method_proto);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int count_bits_debug(uint64_t x) {
|
|
|
|
// For assertions only, speed does not matter.
|
|
|
|
int n = 0;
|
|
|
|
while (x) {
|
|
|
|
if (x & 1) n++;
|
|
|
|
x >>= 1;
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
|
|
|
|
const upb_EnumDef* e) {
|
|
|
|
int n = 0;
|
|
|
|
uint64_t mask = 0;
|
|
|
|
|
|
|
|
for (int i = 0; i < e->value_count; i++) {
|
|
|
|
uint32_t val = (uint32_t)e->values[i].number;
|
|
|
|
if (val < 64) {
|
|
|
|
mask |= 1 << val;
|
|
|
|
} else {
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t* values = symtab_alloc(ctx, sizeof(*values) * n);
|
|
|
|
|
|
|
|
if (n) {
|
|
|
|
int32_t* p = values;
|
|
|
|
|
|
|
|
// Add values outside the bitmask range to the list, as described in the
|
|
|
|
// comments for upb_MiniTable_Enum.
|
|
|
|
for (int i = 0; i < e->value_count; i++) {
|
|
|
|
int32_t val = e->values[i].number;
|
|
|
|
if ((uint32_t)val >= 64) {
|
|
|
|
*p++ = val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
UPB_ASSERT(p == values + n);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));
|
|
|
|
|
|
|
|
upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout));
|
|
|
|
layout->value_count = n;
|
|
|
|
layout->mask = mask;
|
|
|
|
layout->values = values;
|
|
|
|
|
|
|
|
return layout;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_enumvaldef(
|
|
|
|
symtab_addctx* ctx, const char* prefix,
|
|
|
|
const google_protobuf_EnumValueDescriptorProto* val_proto, upb_EnumDef* e,
|
|
|
|
int i) {
|
|
|
|
upb_EnumValueDef* val = (upb_EnumValueDef*)&e->values[i];
|
|
|
|
upb_StringView name =
|
|
|
|
google_protobuf_EnumValueDescriptorProto_name(val_proto);
|
|
|
|
upb_value v = upb_value_constptr(val);
|
|
|
|
|
|
|
|
val->parent = e; /* Must happen prior to symtab_add(). */
|
|
|
|
val->full_name = makefullname(ctx, prefix, name);
|
|
|
|
val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto);
|
|
|
|
symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL));
|
|
|
|
|
|
|
|
SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto);
|
|
|
|
|
|
|
|
if (i == 0 && e->file->syntax == kUpb_Syntax_Proto3 && val->number != 0) {
|
|
|
|
symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
|
|
|
|
e->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
CHK_OOM(upb_strtable_insert(&e->ntoi, name.data, name.size, v, ctx->arena));
|
|
|
|
|
|
|
|
// Multiple enumerators can have the same number, first one wins.
|
|
|
|
if (!upb_inttable_lookup(&e->iton, val->number, NULL)) {
|
|
|
|
CHK_OOM(upb_inttable_insert(&e->iton, val->number, v, ctx->arena));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_enumdef(
|
|
|
|
symtab_addctx* ctx, const char* prefix,
|
|
|
|
const google_protobuf_EnumDescriptorProto* enum_proto,
|
|
|
|
const upb_MessageDef* containing_type, const upb_EnumDef* _e) {
|
|
|
|
upb_EnumDef* e = (upb_EnumDef*)_e;
|
|
|
|
;
|
|
|
|
const google_protobuf_EnumValueDescriptorProto* const* values;
|
|
|
|
upb_StringView name;
|
|
|
|
size_t i, n;
|
|
|
|
|
|
|
|
e->file = ctx->file; /* Must happen prior to symtab_add() */
|
|
|
|
e->containing_type = containing_type;
|
|
|
|
|
|
|
|
name = google_protobuf_EnumDescriptorProto_name(enum_proto);
|
|
|
|
check_ident(ctx, name, false);
|
|
|
|
|
|
|
|
e->full_name = makefullname(ctx, prefix, name);
|
|
|
|
symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
|
|
|
|
|
|
|
|
values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
|
|
|
|
CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena));
|
|
|
|
CHK_OOM(upb_inttable_init(&e->iton, ctx->arena));
|
|
|
|
|
|
|
|
e->defaultval = 0;
|
|
|
|
e->value_count = n;
|
|
|
|
e->values = symtab_alloc(ctx, sizeof(*e->values) * n);
|
|
|
|
|
|
|
|
if (n == 0) {
|
|
|
|
symtab_errf(ctx, "enums must contain at least one value (%s)",
|
|
|
|
e->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
create_enumvaldef(ctx, prefix, values[i], e, i);
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_inttable_compact(&e->iton, ctx->arena);
|
|
|
|
|
|
|
|
if (e->file->syntax == kUpb_Syntax_Proto2) {
|
|
|
|
if (ctx->layout) {
|
|
|
|
UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
|
|
|
|
e->layout = ctx->layout->enums[ctx->enum_count++];
|
|
|
|
UPB_ASSERT(n ==
|
|
|
|
e->layout->value_count + count_bits_debug(e->layout->mask));
|
|
|
|
} else {
|
|
|
|
e->layout = create_enumlayout(ctx, e);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
e->layout = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void msgdef_create_nested(
|
|
|
|
symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
|
|
|
|
upb_MessageDef* m);
|
|
|
|
|
|
|
|
static void create_msgdef(symtab_addctx* ctx, const char* prefix,
|
|
|
|
const google_protobuf_DescriptorProto* msg_proto,
|
|
|
|
const upb_MessageDef* containing_type,
|
|
|
|
const upb_MessageDef* _m) {
|
|
|
|
upb_MessageDef* m = (upb_MessageDef*)_m;
|
|
|
|
const google_protobuf_OneofDescriptorProto* const* oneofs;
|
|
|
|
const google_protobuf_FieldDescriptorProto* const* fields;
|
|
|
|
const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges;
|
|
|
|
size_t i, n_oneof, n_field, n_ext_range;
|
|
|
|
upb_StringView name;
|
|
|
|
|
|
|
|
m->file = ctx->file; /* Must happen prior to symtab_add(). */
|
|
|
|
m->containing_type = containing_type;
|
|
|
|
|
|
|
|
name = google_protobuf_DescriptorProto_name(msg_proto);
|
|
|
|
check_ident(ctx, name, false);
|
|
|
|
|
|
|
|
m->full_name = makefullname(ctx, prefix, name);
|
|
|
|
symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
|
|
|
|
|
|
|
|
oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
|
|
|
|
fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
|
|
|
|
ext_ranges =
|
|
|
|
google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range);
|
|
|
|
|
|
|
|
CHK_OOM(upb_inttable_init(&m->itof, ctx->arena));
|
|
|
|
CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena));
|
|
|
|
|
|
|
|
if (ctx->layout) {
|
|
|
|
/* create_fielddef() below depends on this being set. */
|
|
|
|
UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count);
|
|
|
|
m->layout = ctx->layout->msgs[ctx->msg_count++];
|
|
|
|
UPB_ASSERT(n_field == m->layout->field_count);
|
|
|
|
} else {
|
|
|
|
/* Allocate now (to allow cross-linking), populate later. */
|
|
|
|
m->layout =
|
|
|
|
symtab_alloc(ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry));
|
|
|
|
}
|
|
|
|
|
|
|
|
SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto);
|
|
|
|
|
|
|
|
m->oneof_count = n_oneof;
|
|
|
|
m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
|
|
|
|
for (i = 0; i < n_oneof; i++) {
|
|
|
|
create_oneofdef(ctx, m, oneofs[i], &m->oneofs[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
m->field_count = n_field;
|
|
|
|
m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
|
|
|
|
for (i = 0; i < n_field; i++) {
|
|
|
|
create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i],
|
|
|
|
/* is_extension= */ false);
|
|
|
|
}
|
|
|
|
|
|
|
|
m->ext_range_count = n_ext_range;
|
|
|
|
m->ext_ranges = symtab_alloc(ctx, sizeof(*m->ext_ranges) * n_ext_range);
|
|
|
|
for (i = 0; i < n_ext_range; i++) {
|
|
|
|
const google_protobuf_DescriptorProto_ExtensionRange* r = ext_ranges[i];
|
|
|
|
upb_ExtensionRange* r_def = (upb_ExtensionRange*)&m->ext_ranges[i];
|
|
|
|
int32_t start = google_protobuf_DescriptorProto_ExtensionRange_start(r);
|
|
|
|
int32_t end = google_protobuf_DescriptorProto_ExtensionRange_end(r);
|
|
|
|
int32_t max =
|
|
|
|
google_protobuf_MessageOptions_message_set_wire_format(m->opts)
|
|
|
|
? INT32_MAX
|
|
|
|
: kUpb_MaxFieldNumber + 1;
|
|
|
|
|
|
|
|
// A full validation would also check that each range is disjoint, and that
|
|
|
|
// none of the fields overlap with the extension ranges, but we are just
|
|
|
|
// sanity checking here.
|
|
|
|
if (start < 1 || end <= start || end > max) {
|
|
|
|
symtab_errf(ctx, "Extension range (%d, %d) is invalid, message=%s\n",
|
|
|
|
(int)start, (int)end, m->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
r_def->start = start;
|
|
|
|
r_def->end = end;
|
|
|
|
SET_OPTIONS(r_def->opts, DescriptorProto_ExtensionRange,
|
|
|
|
ExtensionRangeOptions, r);
|
|
|
|
}
|
|
|
|
|
|
|
|
finalize_oneofs(ctx, m);
|
|
|
|
assign_msg_wellknowntype(m);
|
|
|
|
upb_inttable_compact(&m->itof, ctx->arena);
|
|
|
|
msgdef_create_nested(ctx, msg_proto, m);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void msgdef_create_nested(
|
|
|
|
symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
|
|
|
|
upb_MessageDef* m) {
|
|
|
|
size_t n;
|
|
|
|
|
|
|
|
const google_protobuf_EnumDescriptorProto* const* enums =
|
|
|
|
google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
|
|
|
|
m->nested_enum_count = n;
|
|
|
|
m->nested_enums = symtab_alloc(ctx, sizeof(*m->nested_enums) * n);
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
m->nested_enum_count = i + 1;
|
|
|
|
create_enumdef(ctx, m->full_name, enums[i], m, &m->nested_enums[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
const google_protobuf_FieldDescriptorProto* const* exts =
|
|
|
|
google_protobuf_DescriptorProto_extension(msg_proto, &n);
|
|
|
|
m->nested_ext_count = n;
|
|
|
|
m->nested_exts = symtab_alloc(ctx, sizeof(*m->nested_exts) * n);
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
create_fielddef(ctx, m->full_name, m, exts[i], &m->nested_exts[i],
|
|
|
|
/* is_extension= */ true);
|
|
|
|
((upb_FieldDef*)&m->nested_exts[i])->index_ = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
const google_protobuf_DescriptorProto* const* msgs =
|
|
|
|
google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
|
|
|
|
m->nested_msg_count = n;
|
|
|
|
m->nested_msgs = symtab_alloc(ctx, sizeof(*m->nested_msgs) * n);
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
create_msgdef(ctx, m->full_name, msgs[i], m, &m->nested_msgs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void resolve_subdef(symtab_addctx* ctx, const char* prefix,
|
|
|
|
upb_FieldDef* f) {
|
|
|
|
const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
|
|
|
|
upb_StringView name =
|
|
|
|
google_protobuf_FieldDescriptorProto_type_name(field_proto);
|
|
|
|
bool has_name =
|
|
|
|
google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
|
|
|
|
switch ((int)f->type_) {
|
|
|
|
case FIELD_TYPE_UNSPECIFIED: {
|
|
|
|
// Type was not specified and must be inferred.
|
|
|
|
UPB_ASSERT(has_name);
|
|
|
|
upb_deftype_t type;
|
|
|
|
const void* def =
|
|
|
|
symtab_resolveany(ctx, f->full_name, prefix, name, &type);
|
|
|
|
switch (type) {
|
|
|
|
case UPB_DEFTYPE_ENUM:
|
|
|
|
f->sub.enumdef = def;
|
|
|
|
f->type_ = upb_FieldType_Enum;
|
|
|
|
break;
|
|
|
|
case UPB_DEFTYPE_MSG:
|
|
|
|
f->sub.msgdef = def;
|
|
|
|
f->type_ = upb_FieldType_Message; // It appears there is no way of
|
|
|
|
// this being a group.
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
symtab_errf(ctx, "Couldn't resolve type name for field %s",
|
|
|
|
f->full_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case upb_FieldType_Message:
|
|
|
|
case upb_FieldType_Group:
|
|
|
|
UPB_ASSERT(has_name);
|
|
|
|
f->sub.msgdef =
|
|
|
|
symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
|
|
|
|
break;
|
|
|
|
case upb_FieldType_Enum:
|
|
|
|
UPB_ASSERT(has_name);
|
|
|
|
f->sub.enumdef =
|
|
|
|
symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_ENUM);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// No resolution necessary.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void resolve_extension(
|
|
|
|
symtab_addctx* ctx, const char* prefix, upb_FieldDef* f,
|
|
|
|
const google_protobuf_FieldDescriptorProto* field_proto) {
|
|
|
|
if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
|
|
|
|
symtab_errf(ctx, "extension for field '%s' had no extendee", f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_StringView name =
|
|
|
|
google_protobuf_FieldDescriptorProto_extendee(field_proto);
|
|
|
|
const upb_MessageDef* m =
|
|
|
|
symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
|
|
|
|
f->msgdef = m;
|
|
|
|
|
|
|
|
bool found = false;
|
|
|
|
|
|
|
|
for (int i = 0, n = m->ext_range_count; i < n; i++) {
|
|
|
|
const upb_ExtensionRange* r = &m->ext_ranges[i];
|
|
|
|
if (r->start <= f->number_ && f->number_ < r->end) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
symtab_errf(ctx,
|
|
|
|
"field number %u in extension %s has no extension range in "
|
|
|
|
"message %s",
|
|
|
|
(unsigned)f->number_, f->full_name, f->msgdef->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_MiniTable_Extension* ext = ctx->file->ext_layouts[f->layout_index];
|
|
|
|
if (ctx->layout) {
|
|
|
|
UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number);
|
|
|
|
} else {
|
|
|
|
upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext;
|
|
|
|
fill_fieldlayout(&mut_ext->field, f);
|
|
|
|
mut_ext->field.presence = 0;
|
|
|
|
mut_ext->field.offset = 0;
|
|
|
|
mut_ext->field.submsg_index = 0;
|
|
|
|
mut_ext->extendee = f->msgdef->layout;
|
|
|
|
mut_ext->sub.submsg = f->sub.msgdef->layout;
|
|
|
|
}
|
|
|
|
|
|
|
|
CHK_OOM(upb_inttable_insert(&ctx->symtab->exts, (uintptr_t)ext,
|
|
|
|
upb_value_constptr(f), ctx->arena));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void resolve_default(
|
|
|
|
symtab_addctx* ctx, upb_FieldDef* f,
|
|
|
|
const google_protobuf_FieldDescriptorProto* field_proto) {
|
|
|
|
// Have to delay resolving of the default value until now because of the enum
|
|
|
|
// case, since enum defaults are specified with a label.
|
|
|
|
if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
|
|
|
|
upb_StringView defaultval =
|
|
|
|
google_protobuf_FieldDescriptorProto_default_value(field_proto);
|
|
|
|
|
|
|
|
if (f->file->syntax == kUpb_Syntax_Proto3) {
|
|
|
|
symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
|
|
|
|
f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (upb_FieldDef_IsSubMessage(f)) {
|
|
|
|
symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
|
|
|
|
f->full_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
parse_default(ctx, defaultval.data, defaultval.size, f);
|
|
|
|
f->has_default = true;
|
|
|
|
} else {
|
|
|
|
set_default_default(ctx, f);
|
|
|
|
f->has_default = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void resolve_fielddef(symtab_addctx* ctx, const char* prefix,
|
|
|
|
upb_FieldDef* f) {
|
|
|
|
// We have to stash this away since resolve_subdef() may overwrite it.
|
|
|
|
const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
|
|
|
|
|
|
|
|
resolve_subdef(ctx, prefix, f);
|
|
|
|
resolve_default(ctx, f, field_proto);
|
|
|
|
|
|
|
|
if (f->is_extension_) {
|
|
|
|
resolve_extension(ctx, prefix, f, field_proto);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) {
|
|
|
|
for (int i = 0; i < m->field_count; i++) {
|
|
|
|
resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < m->nested_ext_count; i++) {
|
|
|
|
resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->nested_exts[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ctx->layout) make_layout(ctx, m);
|
|
|
|
|
|
|
|
m->in_message_set = false;
|
|
|
|
if (m->nested_ext_count == 1) {
|
|
|
|
const upb_FieldDef* ext = &m->nested_exts[0];
|
|
|
|
if (ext->type_ == upb_FieldType_Message &&
|
|
|
|
ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m &&
|
|
|
|
google_protobuf_MessageOptions_message_set_wire_format(
|
|
|
|
ext->msgdef->opts)) {
|
|
|
|
m->in_message_set = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < m->nested_msg_count; i++) {
|
|
|
|
resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int count_exts_in_msg(const google_protobuf_DescriptorProto* msg_proto) {
|
|
|
|
size_t n;
|
|
|
|
google_protobuf_DescriptorProto_extension(msg_proto, &n);
|
|
|
|
int ext_count = n;
|
|
|
|
|
|
|
|
const google_protobuf_DescriptorProto* const* nested_msgs =
|
|
|
|
google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
ext_count += count_exts_in_msg(nested_msgs[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ext_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void build_filedef(
|
|
|
|
symtab_addctx* ctx, upb_FileDef* file,
|
|
|
|
const google_protobuf_FileDescriptorProto* file_proto) {
|
|
|
|
const google_protobuf_DescriptorProto* const* msgs;
|
|
|
|
const google_protobuf_EnumDescriptorProto* const* enums;
|
|
|
|
const google_protobuf_FieldDescriptorProto* const* exts;
|
|
|
|
const google_protobuf_ServiceDescriptorProto* const* services;
|
|
|
|
const upb_StringView* strs;
|
|
|
|
const int32_t* public_deps;
|
|
|
|
const int32_t* weak_deps;
|
|
|
|
size_t i, n;
|
|
|
|
|
|
|
|
file->symtab = ctx->symtab;
|
|
|
|
|
|
|
|
/* Count all extensions in the file, to build a flat array of layouts. */
|
|
|
|
google_protobuf_FileDescriptorProto_extension(file_proto, &n);
|
|
|
|
int ext_count = n;
|
|
|
|
msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
ext_count += count_exts_in_msg(msgs[i]);
|
|
|
|
}
|
|
|
|
file->ext_count = ext_count;
|
|
|
|
|
|
|
|
if (ctx->layout) {
|
|
|
|
/* We are using the ext layouts that were passed in. */
|
|
|
|
file->ext_layouts = ctx->layout->exts;
|
|
|
|
if (ctx->layout->ext_count != file->ext_count) {
|
|
|
|
symtab_errf(ctx, "Extension count did not match layout (%d vs %d)",
|
|
|
|
ctx->layout->ext_count, file->ext_count);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* We are building ext layouts from scratch. */
|
|
|
|
file->ext_layouts =
|
|
|
|
symtab_alloc(ctx, sizeof(*file->ext_layouts) * file->ext_count);
|
|
|
|
upb_MiniTable_Extension* ext =
|
|
|
|
symtab_alloc(ctx, sizeof(*ext) * file->ext_count);
|
|
|
|
for (int i = 0; i < file->ext_count; i++) {
|
|
|
|
file->ext_layouts[i] = &ext[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
|
|
|
|
symtab_errf(ctx, "File has no name");
|
|
|
|
}
|
|
|
|
|
|
|
|
file->name =
|
|
|
|
strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
|
|
|
|
|
|
|
|
if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
|
|
|
|
upb_StringView package =
|
|
|
|
google_protobuf_FileDescriptorProto_package(file_proto);
|
|
|
|
check_ident(ctx, package, true);
|
|
|
|
file->package = strviewdup(ctx, package);
|
|
|
|
} else {
|
|
|
|
file->package = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
|
|
|
|
upb_StringView syntax =
|
|
|
|
google_protobuf_FileDescriptorProto_syntax(file_proto);
|
|
|
|
|
|
|
|
if (streql_view(syntax, "proto2")) {
|
|
|
|
file->syntax = kUpb_Syntax_Proto2;
|
|
|
|
} else if (streql_view(syntax, "proto3")) {
|
|
|
|
file->syntax = kUpb_Syntax_Proto3;
|
|
|
|
} else {
|
|
|
|
symtab_errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'",
|
|
|
|
UPB_STRINGVIEW_ARGS(syntax));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
file->syntax = kUpb_Syntax_Proto2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Read options. */
|
|
|
|
SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto);
|
|
|
|
|
|
|
|
/* Verify dependencies. */
|
|
|
|
strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
|
|
|
|
file->dep_count = n;
|
|
|
|
file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
upb_StringView str = strs[i];
|
|
|
|
file->deps[i] =
|
|
|
|
upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size);
|
|
|
|
if (!file->deps[i]) {
|
|
|
|
symtab_errf(ctx,
|
|
|
|
"Depends on file '" UPB_STRINGVIEW_FORMAT
|
|
|
|
"', but it has not been loaded",
|
|
|
|
UPB_STRINGVIEW_ARGS(str));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public_deps =
|
|
|
|
google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n);
|
|
|
|
file->public_dep_count = n;
|
|
|
|
file->public_deps = symtab_alloc(ctx, sizeof(*file->public_deps) * n);
|
|
|
|
int32_t* mutable_public_deps = (int32_t*)file->public_deps;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
if (public_deps[i] >= file->dep_count) {
|
|
|
|
symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]);
|
|
|
|
}
|
|
|
|
mutable_public_deps[i] = public_deps[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
weak_deps =
|
|
|
|
google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n);
|
|
|
|
file->weak_dep_count = n;
|
|
|
|
file->weak_deps = symtab_alloc(ctx, sizeof(*file->weak_deps) * n);
|
|
|
|
int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
if (weak_deps[i] >= file->dep_count) {
|
|
|
|
symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]);
|
|
|
|
}
|
|
|
|
mutable_weak_deps[i] = weak_deps[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create enums. */
|
|
|
|
enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
|
|
|
|
file->top_lvl_enum_count = n;
|
|
|
|
file->top_lvl_enums = symtab_alloc(ctx, sizeof(*file->top_lvl_enums) * n);
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
create_enumdef(ctx, file->package, enums[i], NULL, &file->top_lvl_enums[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create extensions. */
|
|
|
|
exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
|
|
|
|
file->top_lvl_ext_count = n;
|
|
|
|
file->top_lvl_exts = symtab_alloc(ctx, sizeof(*file->top_lvl_exts) * n);
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i],
|
|
|
|
/* is_extension= */ true);
|
|
|
|
((upb_FieldDef*)&file->top_lvl_exts[i])->index_ = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create messages. */
|
|
|
|
msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
|
|
|
|
file->top_lvl_msg_count = n;
|
|
|
|
file->top_lvl_msgs = symtab_alloc(ctx, sizeof(*file->top_lvl_msgs) * n);
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
create_msgdef(ctx, file->package, msgs[i], NULL, &file->top_lvl_msgs[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create services. */
|
|
|
|
services = google_protobuf_FileDescriptorProto_service(file_proto, &n);
|
|
|
|
file->service_count = n;
|
|
|
|
file->services = symtab_alloc(ctx, sizeof(*file->services) * n);
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
create_service(ctx, services[i], &file->services[i]);
|
|
|
|
((upb_ServiceDef*)&file->services[i])->index = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now that all names are in the table, build layouts and resolve refs. */
|
|
|
|
for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) {
|
|
|
|
resolve_fielddef(ctx, file->package, (upb_FieldDef*)&file->top_lvl_exts[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) {
|
|
|
|
resolve_msgdef(ctx, (upb_MessageDef*)&file->top_lvl_msgs[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (file->ext_count) {
|
|
|
|
CHK_OOM(_upb_extreg_add(ctx->symtab->extreg, file->ext_layouts,
|
|
|
|
file->ext_count));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void remove_filedef(upb_DefPool* s, upb_FileDef* file) {
|
|
|
|
intptr_t iter = UPB_INTTABLE_BEGIN;
|
|
|
|
upb_StringView key;
|
|
|
|
upb_value val;
|
|
|
|
while (upb_strtable_next2(&s->syms, &key, &val, &iter)) {
|
|
|
|
const upb_FileDef* f;
|
|
|
|
switch (deftype(val)) {
|
|
|
|
case UPB_DEFTYPE_EXT:
|
|
|
|
f = upb_FieldDef_File(unpack_def(val, UPB_DEFTYPE_EXT));
|
|
|
|
break;
|
|
|
|
case UPB_DEFTYPE_MSG:
|
|
|
|
f = upb_MessageDef_File(unpack_def(val, UPB_DEFTYPE_MSG));
|
|
|
|
break;
|
|
|
|
case UPB_DEFTYPE_ENUM:
|
|
|
|
f = upb_EnumDef_File(unpack_def(val, UPB_DEFTYPE_ENUM));
|
|
|
|
break;
|
|
|
|
case UPB_DEFTYPE_ENUMVAL:
|
|
|
|
f = upb_EnumDef_File(
|
|
|
|
upb_EnumValueDef_Enum(unpack_def(val, UPB_DEFTYPE_ENUMVAL)));
|
|
|
|
break;
|
|
|
|
case UPB_DEFTYPE_SERVICE:
|
|
|
|
f = upb_ServiceDef_File(unpack_def(val, UPB_DEFTYPE_SERVICE));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (f == file) upb_strtable_removeiter(&s->syms, &iter);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const upb_FileDef* _upb_DefPool_AddFile(
|
|
|
|
upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
|
|
|
|
const upb_MiniTable_File* layout, upb_Status* status) {
|
|
|
|
symtab_addctx ctx;
|
|
|
|
upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto);
|
|
|
|
upb_value v;
|
|
|
|
|
|
|
|
if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) {
|
|
|
|
if (unpack_def(v, UPB_DEFTYPE_FILE)) {
|
|
|
|
upb_Status_SetErrorFormat(status, "duplicate file name (%.*s)",
|
|
|
|
UPB_STRINGVIEW_ARGS(name));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
const upb_MiniTable_File* registered = unpack_def(v, UPB_DEFTYPE_LAYOUT);
|
|
|
|
UPB_ASSERT(registered);
|
|
|
|
if (layout && layout != registered) {
|
|
|
|
upb_Status_SetErrorFormat(
|
|
|
|
status, "tried to build with a different layout (filename=%.*s)",
|
|
|
|
UPB_STRINGVIEW_ARGS(name));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
layout = registered;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx.symtab = s;
|
|
|
|
ctx.layout = layout;
|
|
|
|
ctx.msg_count = 0;
|
|
|
|
ctx.enum_count = 0;
|
|
|
|
ctx.ext_count = 0;
|
|
|
|
ctx.status = status;
|
|
|
|
ctx.file = NULL;
|
|
|
|
ctx.arena = upb_Arena_New();
|
|
|
|
ctx.tmp_arena = upb_Arena_New();
|
|
|
|
|
|
|
|
if (!ctx.arena || !ctx.tmp_arena) {
|
|
|
|
if (ctx.arena) upb_Arena_Free(ctx.arena);
|
|
|
|
if (ctx.tmp_arena) upb_Arena_Free(ctx.tmp_arena);
|
|
|
|
upb_Status_setoom(status);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
|
|
|
|
UPB_ASSERT(!upb_Status_IsOk(status));
|
|
|
|
if (ctx.file) {
|
|
|
|
remove_filedef(s, ctx.file);
|
|
|
|
ctx.file = NULL;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ctx.file = symtab_alloc(&ctx, sizeof(*ctx.file));
|
|
|
|
build_filedef(&ctx, ctx.file, file_proto);
|
|
|
|
upb_strtable_insert(&s->files, name.data, name.size,
|
|
|
|
pack_def(ctx.file, UPB_DEFTYPE_FILE), ctx.arena);
|
|
|
|
UPB_ASSERT(upb_Status_IsOk(status));
|
|
|
|
upb_Arena_Fuse(s->arena, ctx.arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_Arena_Free(ctx.arena);
|
|
|
|
upb_Arena_Free(ctx.tmp_arena);
|
|
|
|
return ctx.file;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FileDef* upb_DefPool_AddFile(
|
|
|
|
upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
|
|
|
|
upb_Status* status) {
|
|
|
|
return _upb_DefPool_AddFile(s, file_proto, NULL, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Include here since we want most of this file to be stdio-free. */
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) {
|
|
|
|
/* Since this function should never fail (it would indicate a bug in upb) we
|
|
|
|
* print errors to stderr instead of returning error status to the user. */
|
|
|
|
_upb_DefPool_Init** deps = init->deps;
|
|
|
|
google_protobuf_FileDescriptorProto* file;
|
|
|
|
upb_Arena* arena;
|
|
|
|
upb_Status status;
|
|
|
|
|
|
|
|
upb_Status_Clear(&status);
|
|
|
|
|
|
|
|
if (upb_DefPool_FindFileByName(s, init->filename)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
arena = upb_Arena_New();
|
|
|
|
|
|
|
|
for (; *deps; deps++) {
|
|
|
|
if (!_upb_DefPool_LoadDefInit(s, *deps)) goto err;
|
|
|
|
}
|
|
|
|
|
Added API for copy vs. alias and added benchmarks to test both.
Benchmark output:
$ bazel-bin/benchmarks/benchmark '--benchmark_filter=BM_Parse'
2020-11-11 15:39:04
Running bazel-bin/benchmarks/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc<UseArena, Copy> 4134 ns 4134 ns 168714 1.69152GB/s
BM_Parse_Upb_FileDesc<UseArena, Alias> 3487 ns 3487 ns 199509 2.00526GB/s
BM_Parse_Upb_FileDesc<InitBlock, Copy> 3727 ns 3726 ns 187581 1.87643GB/s
BM_Parse_Upb_FileDesc<InitBlock, Alias> 3110 ns 3110 ns 224970 2.24866GB/s
BM_Parse_Proto2<FileDesc, NoArena, Copy> 31132 ns 31132 ns 22437 229.995MB/s
BM_Parse_Proto2<FileDesc, UseArena, Copy> 21011 ns 21009 ns 33922 340.812MB/s
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 17976 ns 17975 ns 38808 398.337MB/s
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 17357 ns 17356 ns 40244 412.539MB/s
4 years ago
|
|
|
file = google_protobuf_FileDescriptorProto_parse_ex(
|
|
|
|
init->descriptor.data, init->descriptor.size, NULL,
|
|
|
|
kUpb_DecodeOption_AliasString, arena);
|
|
|
|
s->bytes_loaded += init->descriptor.size;
|
|
|
|
|
|
|
|
if (!file) {
|
|
|
|
upb_Status_SetErrorFormat(
|
|
|
|
&status,
|
|
|
|
"Failed to parse compiled-in descriptor for file '%s'. This should "
|
|
|
|
"never happen.",
|
|
|
|
init->filename);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!_upb_DefPool_AddFile(s, file, init->layout, &status)) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
err:
|
|
|
|
fprintf(stderr,
|
|
|
|
"Error loading compiled-in descriptor for file '%s' (this should "
|
|
|
|
"never happen): %s\n",
|
|
|
|
init->filename, upb_Status_ErrorMessage(&status));
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) {
|
|
|
|
return s->bytes_loaded;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; }
|
|
|
|
|
|
|
|
const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable(
|
|
|
|
const upb_DefPool* s, const upb_MiniTable_Extension* ext) {
|
|
|
|
upb_value v;
|
|
|
|
bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v);
|
|
|
|
UPB_ASSERT(ok);
|
|
|
|
return upb_value_getconstptr(v);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s,
|
|
|
|
const upb_MessageDef* m,
|
|
|
|
int32_t fieldnum) {
|
|
|
|
const upb_MiniTable* l = upb_MessageDef_MiniTable(m);
|
|
|
|
const upb_MiniTable_Extension* ext = _upb_extreg_get(s->extreg, l, fieldnum);
|
|
|
|
return ext ? _upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool _upb_DefPool_registerlayout(upb_DefPool* s, const char* filename,
|
|
|
|
const upb_MiniTable_File* file) {
|
|
|
|
if (upb_DefPool_FindFileByName(s, filename)) return false;
|
|
|
|
upb_value v = pack_def(file, UPB_DEFTYPE_LAYOUT);
|
|
|
|
return upb_strtable_insert(&s->files, filename, strlen(filename), v,
|
|
|
|
s->arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry(
|
|
|
|
const upb_DefPool* s) {
|
|
|
|
return s->extreg;
|
|
|
|
}
|
|
|
|
|
|
|
|
const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s,
|
|
|
|
const upb_MessageDef* m,
|
|
|
|
size_t* count) {
|
|
|
|
size_t n = 0;
|
|
|
|
intptr_t iter = UPB_INTTABLE_BEGIN;
|
|
|
|
uintptr_t key;
|
|
|
|
upb_value val;
|
|
|
|
// This is O(all exts) instead of O(exts for m). If we need this to be
|
|
|
|
// efficient we may need to make extreg into a two-level table, or have a
|
|
|
|
// second per-message index.
|
|
|
|
while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
|
|
|
|
const upb_FieldDef* f = upb_value_getconstptr(val);
|
|
|
|
if (upb_FieldDef_ContainingType(f) == m) n++;
|
|
|
|
}
|
|
|
|
const upb_FieldDef** exts = malloc(n * sizeof(*exts));
|
|
|
|
iter = UPB_INTTABLE_BEGIN;
|
|
|
|
size_t i = 0;
|
|
|
|
while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
|
|
|
|
const upb_FieldDef* f = upb_value_getconstptr(val);
|
|
|
|
if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f;
|
|
|
|
}
|
|
|
|
*count = n;
|
|
|
|
return exts;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef CHK_OOM
|