upb_msg constructor now more or less works.

pull/13171/head
Joshua Haberman 16 years ago
parent c7f2a271ae
commit 3a67a1e9f9
  1. 50
      upb_msg.c
  2. 69
      upb_msg.h

@ -17,17 +17,18 @@ static int div_round_up(int numerator, int denominator) {
}
static int compare_fields(const void *e1, const void *e2) {
const google_protobuf_FieldDescriptorProto *f1 = e1, *f2 = e2;
const struct upb_msg_field *f1 = e1, *f2 = e2;
const google_protobuf_FieldDescriptorProto *fd1 = f1->descriptor;
const google_protobuf_FieldDescriptorProto *fd2 = f2->descriptor;
/* Required fields go before non-required. */
if(f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
f2->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
return -1;
} else if(f1->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
return 1;
bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
if(req1 != req2) {
return req2 - req1;
} else {
/* Within required and non-required field lists, list in number order. */
return f1->number - f2->number;
/* Within required and non-required field lists, list in number order.
* TODO: consider ordering by data size to reduce padding. */
return fd1->number - fd2->number;
}
}
@ -41,35 +42,44 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d)
upb_strtable_init(&m->fields_by_name, d->field->len,
sizeof(struct upb_fieldsbyname_entry));
m->fields = malloc(sizeof(struct upb_msg_field) * d->field->len);
m->num_fields = d->field->len;
m->set_flags_bytes = div_round_up(m->num_fields, 8);
/* These are incremented in the loop. */
m->num_required_fields = 0;
m->size = m->set_flags_bytes;
qsort(m->fields, d->field->len, sizeof(struct upb_msg_field), compare_fields);
m->fields = malloc(sizeof(struct upb_msg_field) * m->num_fields);
for(unsigned int i = 0; i < m->num_fields; i++) {
/* We count on the caller to keep this pointer alive. */
m->fields[i].descriptor = d->field->elements[i];
}
qsort(m->fields, m->num_fields, sizeof(struct upb_msg_field), compare_fields);
size_t max_align = 0;
for(unsigned int i = 0; i < d->field->len; i++) {
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd; /* TODO */
struct upb_type_info *type_info = &upb_type_info[f->type];
google_protobuf_FieldDescriptorProto *fd = f->descriptor;
struct upb_type_info *type_info = &upb_type_info[fd->type];
/* General alignment rules are: each member must be at an address that is a
* multiple of that type's alignment. Also, the size of the structure as
* a whole must be a multiple of the greatest alignment of any member. */
f->field_index = i;
f->type = fd->type;
f->byte_offset = ALIGN_UP(m->size, type_info->align);
m->size = f->byte_offset + type_info->size;
max_align = max(max_align, type_info->align);
if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
m->num_required_fields++;
/* Insert into the tables. Note that f->ref will be uninitialized, even in
* the tables' copies of *f, which is why we must update them separately
/* Insert into the tables. Note that af->ref will be uninitialized, even in
* the tables' copies of *af, which is why we must update them separately
* when the references are resolved. */
struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
struct upb_abbrev_msg_field af = {.byte_offset = f->byte_offset,
.field_index = f->field_index,
.type = fd->type};
struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = af};
struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = af};
upb_inttable_insert(&m->fields_by_num, &nument.e);
upb_strtable_insert(&m->fields_by_name, &strent.e);
}

@ -45,38 +45,67 @@
extern "C" {
#endif
/* Forward declarations from descriptor.h. */
struct google_protobuf_DescriptorProto;
struct google_protobuf_FieldDescriptorProto;
/* Structure definition. ******************************************************/
/* Fields that reference other types have pointers to the other type. */
union upb_msg_field_ref {
struct upb_msg *msg; /* Set if type == MESSAGE */
struct upb_enum *_enum; /* Set if type == ENUM */
};
/* Structure that describes a single field in a message. */
struct upb_msg_field {
struct google_protobuf_FieldDescriptorProto *descriptor;
uint32_t byte_offset; /* Where to find the data. */
uint32_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
union upb_msg_field_ref ref;
};
/* Structure that describes a single .proto message type. */
struct upb_msg {
struct google_protobuf_DescriptorProto *descriptor;
size_t size;
uint32_t num_fields;
uint32_t set_flags_bytes;
uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */
struct upb_inttable fields_by_num;
struct upb_strtable fields_by_name;
struct upb_msg_field *fields;
};
/* The num->field and name->field maps in upb_msg allow fast lookup of fields
* by number or name. These lookups are in the critical path of parsing and
* field lookup, so they must be as fast as possible. To make these more
* cache-friendly, we put the data in the table by value, but use only an
* abbreviated set of data (ie. not all the data in upb_msg_field). Notably,
* we don't include the pointer to the field descriptor. But the upb_msg_field
* can be retrieved in its entirety using the function below.*/
struct upb_abbrev_msg_field {
uint32_t byte_offset; /* Where to find the data. */
uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
union {
struct upb_msg *msg; /* Set if type == MESSAGE */
struct upb_enum *_enum; /* Set if type == ENUM */
} ref;
union upb_msg_field_ref ref;
};
struct upb_fieldsbynum_entry {
struct upb_inttable_entry e;
struct upb_msg_field f;
struct upb_abbrev_msg_field f;
};
struct upb_fieldsbyname_entry {
struct upb_strtable_entry e;
struct upb_msg_field f;
struct upb_abbrev_msg_field f;
};
struct upb_msg {
struct google_protobuf_DescriptorProto *descriptor;
size_t size;
int num_fields;
int set_flags_bytes;
int num_required_fields; /* Required fields have the lowest set bytemasks. */
struct upb_inttable fields_by_num;
struct upb_strtable fields_by_name;
struct upb_msg_field *fields;
};
struct upb_msg_field *upb_get_msg_field(
struct upb_abbrev_msg_field *f, struct upb_msg *m) {
return &m->fields[f->field_index];
}
/* Initialize and free a upb_msg. Caller retains ownership of d, but the msg
* will contain references to it, so it must outlive the msg. Note that init
@ -87,14 +116,14 @@ void upb_msg_free(struct upb_msg *m);
/* While these are written to be as fast as possible, it will still be faster
* to cache the results of this lookup if possible. These return NULL if no
* such field is found. */
INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
uint32_t number) {
INLINE struct upb_abbrev_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
uint32_t number) {
struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
&m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
return e ? &e->f : NULL;
}
INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
struct upb_string *name) {
INLINE struct upb_abbrev_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
struct upb_string *name) {
struct upb_fieldsbyname_entry *e =
upb_strtable_lookup(&m->fields_by_name, name);
return e ? &e->f : NULL;

Loading…
Cancel
Save