upb_msg constructor now more or less works.

16 years ago · 3a67a1e9f9
parent c7f2a271ae
commit 3a67a1e9f9
2 changed files with 79 additions and 40 deletions
--- a/upb_msg.c
+++ b/upb_msg.c
@ -17,17 +17,18 @@ static int div_round_up(int numerator, int denominator) {
 }

 static int compare_fields(const void *e1, const void *e2) {
-  const google_protobuf_FieldDescriptorProto *f1  = e1, *f2 = e2;
+  const struct upb_msg_field *f1 = e1, *f2 = e2;
+  const google_protobuf_FieldDescriptorProto *fd1  = f1->descriptor;
+  const google_protobuf_FieldDescriptorProto *fd2  = f2->descriptor;
  /* Required fields go before non-required. */
-  if(f1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
-     f2->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
-    return -1;
-  } else if(f1->label != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED &&
-            f2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED) {
-    return 1;
+  bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
+  bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
+  if(req1 != req2) {
+    return req2 - req1;
  } else {
-    /* Within required and non-required field lists, list in number order. */
-    return f1->number - f2->number;
+    /* Within required and non-required field lists, list in number order.
+     * TODO: consider ordering by data size to reduce padding. */
+    return fd1->number - fd2->number;
  }
 }

@ -41,35 +42,44 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d)
  upb_strtable_init(&m->fields_by_name, d->field->len,
                    sizeof(struct upb_fieldsbyname_entry));

-  m->fields = malloc(sizeof(struct upb_msg_field) * d->field->len);
  m->num_fields = d->field->len;
  m->set_flags_bytes = div_round_up(m->num_fields, 8);
-
  /* These are incremented in the loop. */
  m->num_required_fields = 0;
  m->size = m->set_flags_bytes;

-  qsort(m->fields, d->field->len, sizeof(struct upb_msg_field), compare_fields);
+  m->fields = malloc(sizeof(struct upb_msg_field) * m->num_fields);
+  for(unsigned int i = 0; i < m->num_fields; i++) {
+    /* We count on the caller to keep this pointer alive. */
+    m->fields[i].descriptor = d->field->elements[i];
+  }
+  qsort(m->fields, m->num_fields, sizeof(struct upb_msg_field), compare_fields);

  size_t max_align = 0;

-  for(unsigned int i = 0; i < d->field->len; i++) {
+  for(unsigned int i = 0; i < m->num_fields; i++) {
    struct upb_msg_field *f = &m->fields[i];
-    google_protobuf_FieldDescriptorProto *fd;  /* TODO */
-    struct upb_type_info *type_info = &upb_type_info[f->type];
+    google_protobuf_FieldDescriptorProto *fd = f->descriptor;
+    struct upb_type_info *type_info = &upb_type_info[fd->type];
+
+    /* General alignment rules are: each member must be at an address that is a
+     * multiple of that type's alignment.  Also, the size of the structure as
+     * a whole must be a multiple of the greatest alignment of any member. */
    f->field_index = i;
-    f->type = fd->type;
    f->byte_offset = ALIGN_UP(m->size, type_info->align);
    m->size = f->byte_offset + type_info->size;
    max_align = max(max_align, type_info->align);
    if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
      m->num_required_fields++;

-    /* Insert into the tables.  Note that f->ref will be uninitialized, even in
-     * the tables' copies of *f, which is why we must update them separately
+    /* Insert into the tables.  Note that af->ref will be uninitialized, even in
+     * the tables' copies of *af, which is why we must update them separately
     * when the references are resolved. */
-    struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
-    struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
+    struct upb_abbrev_msg_field af = {.byte_offset = f->byte_offset,
+                                      .field_index = f->field_index,
+                                      .type = fd->type};
+    struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = af};
+    struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = af};
    upb_inttable_insert(&m->fields_by_num, &nument.e);
    upb_strtable_insert(&m->fields_by_name, &strent.e);
  }
--- a/upb_msg.h
+++ b/upb_msg.h
@ -45,38 +45,67 @@
 extern "C" {
 #endif

+/* Forward declarations from descriptor.h. */
+struct google_protobuf_DescriptorProto;
+struct google_protobuf_FieldDescriptorProto;
+
 /* Structure definition. ******************************************************/

+/* Fields that reference other types have pointers to the other type. */
+union upb_msg_field_ref {
+  struct upb_msg *msg;    /* Set if type == MESSAGE */
+  struct upb_enum *_enum; /* Set if type == ENUM */
+};
+
+/* Structure that describes a single field in a message. */
 struct upb_msg_field {
+  struct google_protobuf_FieldDescriptorProto *descriptor;
+  uint32_t byte_offset;     /* Where to find the data. */
+  uint32_t field_index;  /* Indexes upb_msg.fields. Also indicates set bit */
+  union upb_msg_field_ref ref;
+};
+
+/* Structure that describes a single .proto message type. */
+struct upb_msg {
+  struct google_protobuf_DescriptorProto *descriptor;
+  size_t size;
+  uint32_t num_fields;
+  uint32_t set_flags_bytes;
+  uint32_t num_required_fields;  /* Required fields have the lowest set bytemasks. */
+  struct upb_inttable fields_by_num;
+  struct upb_strtable fields_by_name;
+  struct upb_msg_field *fields;
+};
+
+/* The num->field and name->field maps in upb_msg allow fast lookup of fields
+ * by number or name.  These lookups are in the critical path of parsing and
+ * field lookup, so they must be as fast as possible.  To make these more
+ * cache-friendly, we put the data in the table by value, but use only an
+ * abbreviated set of data (ie. not all the data in upb_msg_field).  Notably,
+ * we don't include the pointer to the field descriptor.  But the upb_msg_field
+ * can be retrieved in its entirety using the function below.*/
+
+struct upb_abbrev_msg_field {
  uint32_t byte_offset;     /* Where to find the data. */
  uint32_t field_index:24;  /* Indexes upb_msg.fields. Also indicates set bit */
  upb_field_type_t type;    /* Copied from descriptor for cache-friendliness. */
-  union {
-    struct upb_msg *msg;    /* Set if type == MESSAGE */
-    struct upb_enum *_enum; /* Set if type == ENUM */
-  } ref;
+  union upb_msg_field_ref ref;
 };

 struct upb_fieldsbynum_entry {
  struct upb_inttable_entry e;
-  struct upb_msg_field f;
+  struct upb_abbrev_msg_field f;
 };

 struct upb_fieldsbyname_entry {
  struct upb_strtable_entry e;
-  struct upb_msg_field f;
+  struct upb_abbrev_msg_field f;
 };

-struct upb_msg {
-  struct google_protobuf_DescriptorProto *descriptor;
-  size_t size;
-  int num_fields;
-  int set_flags_bytes;
-  int num_required_fields;  /* Required fields have the lowest set bytemasks. */
-  struct upb_inttable fields_by_num;
-  struct upb_strtable fields_by_name;
-  struct upb_msg_field *fields;
-};
+struct upb_msg_field *upb_get_msg_field(
+    struct upb_abbrev_msg_field *f, struct upb_msg *m) {
+  return &m->fields[f->field_index];
+}

 /* Initialize and free a upb_msg.  Caller retains ownership of d, but the msg
 * will contain references to it, so it must outlive the msg.  Note that init
@ -87,14 +116,14 @@ void upb_msg_free(struct upb_msg *m);
 /* While these are written to be as fast as possible, it will still be faster
 * to cache the results of this lookup if possible.  These return NULL if no
 * such field is found. */
-INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
-                                                uint32_t number) {
+INLINE struct upb_abbrev_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
+                                                       uint32_t number) {
  struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
      &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
  return e ? &e->f : NULL;
 }
-INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
-                                                 struct upb_string *name) {
+INLINE struct upb_abbrev_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
+                                                        struct upb_string *name) {
  struct upb_fieldsbyname_entry *e =
      upb_strtable_lookup(&m->fields_by_name, name);
  return e ? &e->f : NULL;