Refactoring: split defs into their own file, move private parsing funcs out of .h file.

pull/13171/head
Joshua Haberman 15 years ago
parent 33a68acb14
commit e252432a41
  1. 10
      Makefile
  2. 3
      src/upb_context.c
  3. 145
      src/upb_def.c
  4. 202
      src/upb_def.h
  5. 34
      src/upb_enum.c
  6. 43
      src/upb_enum.h
  7. 110
      src/upb_msg.c
  8. 165
      src/upb_msg.h
  9. 192
      src/upb_parse.c
  10. 190
      src/upb_parse.h
  11. 2
      tests/tests.c
  12. 1
      tools/upbc.c

@ -46,7 +46,7 @@ clean:
# The core library (src/libupb.a)
SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c \
src/upb_enum.c src/upb_context.c src/upb_string.c src/upb_text.c \
src/upb_def.c src/upb_context.c src/upb_string.c src/upb_text.c \
descriptor/descriptor.c
#src/upb_serialize.c descriptor/descriptor.c
STATICOBJ=$(patsubst %.c,%.o,$(SRC))
@ -78,14 +78,16 @@ tests: tests/tests \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2
#VALGRIND=valgrind --leak-check=full --error-exitcode=1
VALGRIND=
test: tests
@echo Running all tests under valgrind.
valgrind --leak-check=full --error-exitcode=1 ./tests/tests
$(VALGRIND) ./tests/tests
# Needs to be rewritten to separate the benchmark.
# valgrind --error-exitcode=1 ./tests/test_table
@for test in tests/t.* ; do \
echo valgrind --leak-check=full --error-exitcode=1 ./$$test; \
valgrind --leak-check=full --error-exitcode=1 ./$$test; \
echo $(VALGRIND) ./$$test; \
$(VALGRIND) ./$$test; \
done;
tests/t.test_vs_proto2.googlemessage1 \

@ -8,8 +8,7 @@
#include <string.h>
#include "descriptor.h"
#include "upb_context.h"
#include "upb_enum.h"
#include "upb_msg.h"
#include "upb_def.h"
#include "upb_mm.h"
/* Search for a character in a string, in reverse. */

@ -0,0 +1,145 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_def.h"
#include "descriptor.h"
/* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
static int div_round_up(int numerator, int denominator) {
/* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
}
/* Callback for sorting fields. */
static int compare_fields(const void *e1, const void *e2) {
const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1;
const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2;
/* Required fields go before non-required. */
bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
if(req1 != req2) {
return req2 - req1;
} else {
/* Within required and non-required field lists, list in number order.
* TODO: consider ordering by data size to reduce padding. */
return fd1->number - fd2->number;
}
}
void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
{
qsort(fds, num, sizeof(void*), compare_fields);
}
void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort, struct upb_context *c,
struct upb_status *status)
{
(void)status; // Nothing that can fail at the moment.
int num_fields = d->set_flags.has.field ? d->field->len : 0;
upb_inttable_init(&m->fields_by_num, num_fields,
sizeof(struct upb_fieldsbynum_entry));
upb_strtable_init(&m->fields_by_name, num_fields,
sizeof(struct upb_fieldsbyname_entry));
m->descriptor = d;
m->fqname = fqname;
m->context = c;
m->num_fields = num_fields;
m->set_flags_bytes = div_round_up(m->num_fields, 8);
/* These are incremented in the loop. */
m->num_required_fields = 0;
m->size = m->set_flags_bytes;
m->fields = malloc(sizeof(*m->fields) * m->num_fields);
m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields);
for(unsigned int i = 0; i < m->num_fields; i++) {
/* We count on the caller to keep this pointer alive. */
m->field_descriptors[i] = d->field->elements[i];
}
if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields);
size_t max_align = 0;
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
struct upb_type_info *type_info = &upb_type_info[fd->type];
/* General alignment rules are: each member must be at an address that is a
* multiple of that type's alignment. Also, the size of the structure as
* a whole must be a multiple of the greatest alignment of any member. */
f->field_index = i;
f->byte_offset = ALIGN_UP(m->size, type_info->align);
f->type = fd->type;
f->label = fd->label;
m->size = f->byte_offset + type_info->size;
max_align = UPB_MAX(max_align, type_info->align);
if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
m->num_required_fields++;
/* Insert into the tables. Note that f->ref will be uninitialized, even in
* the tables' copies of *f, which is why we must update them separately
* in upb_msg_setref() below. */
struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
upb_inttable_insert(&m->fields_by_num, &nument.e);
upb_strtable_insert(&m->fields_by_name, &strent.e);
}
if(max_align > 0)
m->size = ALIGN_UP(m->size, max_align);
}
void upb_msgdef_free(struct upb_msgdef *m)
{
upb_inttable_free(&m->fields_by_num);
upb_strtable_free(&m->fields_by_name);
free(m->fields);
free(m->field_descriptors);
}
void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
union upb_symbol_ref ref) {
struct google_protobuf_FieldDescriptorProto *d =
upb_msg_field_descriptor(f, m);
struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup(
&m->fields_by_num, d->number, sizeof(struct upb_fieldsbynum_entry));
struct upb_fieldsbyname_entry *str_e =
upb_strtable_lookup(&m->fields_by_name, d->name);
assert(int_e && str_e);
f->ref = ref;
int_e->f.ref = ref;
str_e->f.ref = ref;
}
void upb_enum_init(struct upb_enum *e,
struct google_protobuf_EnumDescriptorProto *ed,
struct upb_context *c) {
int num_values = ed->set_flags.has.value ? ed->value->len : 0;
e->descriptor = ed;
e->context = c;
upb_atomic_refcount_init(&e->refcount, 0);
upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry));
upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry));
for(int i = 0; i < num_values; i++) {
google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i];
struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name},
.value = value->number};
struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number},
.string = value->name};
upb_strtable_insert(&e->nametoint, &ntoi_entry.e);
upb_inttable_insert(&e->inttoname, &iton_entry.e);
}
}
void upb_enum_free(struct upb_enum *e) {
upb_strtable_free(&e->nametoint);
upb_inttable_free(&e->inttoname);
}

@ -0,0 +1,202 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* Provides definitions of .proto constructs:
* - upb_msgdef: describes a "message" construct.
* - upb_msg_fielddef: describes a message field.
* - upb_enum: describes an enum.
* (TODO: descriptions of extensions and services).
*
* This file contains routines for creating and manipulating the definitions
* themselves. To create and manipulate actual messages, see upb_msg.h.
*/
#ifndef UPB_DEF_H_
#define UPB_DEF_H_
#include "upb_atomic.h"
#include "upb_table.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Message definition. ********************************************************/
struct upb_msg_fielddef;
struct upb_context;
/* Structure that describes a single .proto message type. */
struct upb_msgdef {
struct upb_context *context;
struct upb_msg *default_msg; /* Message with all default values set. */
struct google_protobuf_DescriptorProto *descriptor;
struct upb_string fqname; /* Fully qualified. */
size_t size;
uint32_t num_fields;
uint32_t set_flags_bytes;
uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */
struct upb_inttable fields_by_num;
struct upb_strtable fields_by_name;
struct upb_msg_fielddef *fields;
struct google_protobuf_FieldDescriptorProto **field_descriptors;
};
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
* critical path of parsing. Minimizing the size of this struct increases
* cache-friendliness. */
struct upb_msg_fielddef {
union upb_symbol_ref ref;
uint32_t byte_offset; /* Where to find the data. */
uint16_t field_index; /* Indexes upb_msgdef.fields and indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
upb_label_t label;
};
INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
return upb_issubmsgtype(f->type);
}
INLINE bool upb_isstring(struct upb_msg_fielddef *f) {
return upb_isstringtype(f->type);
}
INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
}
INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) {
return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}
INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) {
return upb_isstring(f) || upb_issubmsg(f);
}
/* Defined iff upb_field_ismm(f). */
INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) {
if(upb_isarray(f)) return UPB_MM_ARR_REF;
else if(upb_isstring(f)) return UPB_MM_STR_REF;
else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
else return -1;
}
/* Defined iff upb_elem_ismm(f). */
INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) {
if(upb_isstring(f)) return UPB_MM_STR_REF;
else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
else return -1;
}
/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
struct upb_msg_fielddef *f, struct upb_msgdef *m) {
return m->field_descriptors[f->field_index];
}
/* Number->field and name->field lookup. *************************************/
/* The num->field and name->field maps in upb_msgdef allow fast lookup of fields
* by number or name. These lookups are in the critical path of parsing and
* field lookup, so they must be as fast as possible. To make these more
* cache-friendly, we put the data in the table by value. */
struct upb_fieldsbynum_entry {
struct upb_inttable_entry e;
struct upb_msg_fielddef f;
};
struct upb_fieldsbyname_entry {
struct upb_strtable_entry e;
struct upb_msg_fielddef f;
};
/* Looks up a field by name or number. While these are written to be as fast
* as possible, it will still be faster to cache the results of this lookup if
* possible. These return NULL if no such field is found. */
INLINE struct upb_msg_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m,
uint32_t number) {
struct upb_fieldsbynum_entry *e =
(struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup(
&m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
return e ? &e->f : NULL;
}
INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
struct upb_string *name) {
struct upb_fieldsbyname_entry *e =
(struct upb_fieldsbyname_entry*)upb_strtable_lookup(
&m->fields_by_name, name);
return e ? &e->f : NULL;
}
/* Enums. *********************************************************************/
struct upb_enum {
upb_atomic_refcount_t refcount;
struct upb_context *context;
struct google_protobuf_EnumDescriptorProto *descriptor;
struct upb_strtable nametoint;
struct upb_inttable inttoname;
};
struct upb_enum_ntoi_entry {
struct upb_strtable_entry e;
uint32_t value;
};
struct upb_enum_iton_entry {
struct upb_inttable_entry e;
struct upb_string *string;
};
/* Initializes and frees an enum, respectively. Caller retains ownership of
* ed, but it must outlive e. */
void upb_enum_init(struct upb_enum *e,
struct google_protobuf_EnumDescriptorProto *ed,
struct upb_context *c);
void upb_enum_free(struct upb_enum *e);
/* Internal functions. ********************************************************/
/* Initializes/frees a upb_msgdef. Usually this will be called by upb_context,
* and clients will not have to construct one directly.
*
* Caller retains ownership of d, but the msg will contain references to it, so
* it must outlive the msg. Note that init does not resolve
* upb_msg_fielddef.ref the caller should do that post-initialization by
* calling upb_msg_ref() below.
*
* fqname indicates the fully-qualified name of this message. Ownership of
* fqname passes to the msg, but the msg will contain references to it, so it
* must outlive the msg.
*
* sort indicates whether or not it is safe to reorder the fields from the order
* they appear in d. This should be false if code has been compiled against a
* header for this type that expects the given order. */
void upb_msgdef_init(struct upb_msgdef *m,
struct google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort,
struct upb_context *c, struct upb_status *status);
void upb_msgdef_free(struct upb_msgdef *m);
/* Sort the given field descriptors in-place, according to what we think is an
* optimal ordering of fields. This can change from upb release to upb
* release. */
void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
size_t num);
/* Clients use this function on a previously initialized upb_msgdef to resolve
* the "ref" field in the upb_msg_fielddef. Since messages can refer to each
* other in mutually-recursive ways, this step must be separated from
* initialization. */
void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
union upb_symbol_ref ref);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DEF_H_ */

@ -1,34 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include "descriptor.h"
#include "upb_enum.h"
void upb_enum_init(struct upb_enum *e,
struct google_protobuf_EnumDescriptorProto *ed,
struct upb_context *c) {
int num_values = ed->set_flags.has.value ? ed->value->len : 0;
e->descriptor = ed;
e->context = c;
upb_atomic_refcount_init(&e->refcount, 0);
upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry));
upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry));
for(int i = 0; i < num_values; i++) {
google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i];
struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name},
.value = value->number};
struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number},
.string = value->name};
upb_strtable_insert(&e->nametoint, &ntoi_entry.e);
upb_inttable_insert(&e->inttoname, &iton_entry.e);
}
}
void upb_enum_free(struct upb_enum *e) {
upb_strtable_free(&e->nametoint);
upb_inttable_free(&e->inttoname);
}

@ -1,43 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* upb_enum is a simple object that allows run-time reflection over the values
* defined within an enum. */
#ifndef UPB_ENUM_H_
#define UPB_ENUM_H_
#include <stdint.h>
#include "upb_atomic.h"
#include "upb_context.h"
#include "upb_table.h"
#include "descriptor.h"
struct upb_enum {
upb_atomic_refcount_t refcount;
struct upb_context *context;
struct google_protobuf_EnumDescriptorProto *descriptor;
struct upb_strtable nametoint;
struct upb_inttable inttoname;
};
struct upb_enum_ntoi_entry {
struct upb_strtable_entry e;
uint32_t value;
};
struct upb_enum_iton_entry {
struct upb_inttable_entry e;
struct upb_string *string;
};
/* Initializes and frees an enum, respectively. Caller retains ownership of
* ed, but it must outlive e. */
void upb_enum_init(struct upb_enum *e,
struct google_protobuf_EnumDescriptorProto *ed,
struct upb_context *c);
void upb_enum_free(struct upb_enum *e);
#endif /* UPB_ENUM_H_ */

@ -13,116 +13,6 @@
#include "upb_serialize.h"
#include "upb_text.h"
/* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
static int div_round_up(int numerator, int denominator) {
/* cf. http://stackoverflow.com/questions/17944/how-to-round-up-the-result-of-integer-division */
return numerator > 0 ? (numerator - 1) / denominator + 1 : 0;
}
/* Callback for sorting fields. */
static int compare_fields(const void *e1, const void *e2) {
const google_protobuf_FieldDescriptorProto *fd1 = *(void**)e1;
const google_protobuf_FieldDescriptorProto *fd2 = *(void**)e2;
/* Required fields go before non-required. */
bool req1 = fd1->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
bool req2 = fd2->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED;
if(req1 != req2) {
return req2 - req1;
} else {
/* Within required and non-required field lists, list in number order.
* TODO: consider ordering by data size to reduce padding. */
return fd1->number - fd2->number;
}
}
void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
{
qsort(fds, num, sizeof(void*), compare_fields);
}
void upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort, struct upb_context *c,
struct upb_status *status)
{
(void)status; // Nothing that can fail at the moment.
int num_fields = d->set_flags.has.field ? d->field->len : 0;
upb_inttable_init(&m->fields_by_num, num_fields,
sizeof(struct upb_fieldsbynum_entry));
upb_strtable_init(&m->fields_by_name, num_fields,
sizeof(struct upb_fieldsbyname_entry));
m->descriptor = d;
m->fqname = fqname;
m->context = c;
m->num_fields = num_fields;
m->set_flags_bytes = div_round_up(m->num_fields, 8);
/* These are incremented in the loop. */
m->num_required_fields = 0;
m->size = m->set_flags_bytes;
m->fields = malloc(sizeof(*m->fields) * m->num_fields);
m->field_descriptors = malloc(sizeof(*m->field_descriptors) * m->num_fields);
for(unsigned int i = 0; i < m->num_fields; i++) {
/* We count on the caller to keep this pointer alive. */
m->field_descriptors[i] = d->field->elements[i];
}
if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields);
size_t max_align = 0;
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
struct upb_type_info *type_info = &upb_type_info[fd->type];
/* General alignment rules are: each member must be at an address that is a
* multiple of that type's alignment. Also, the size of the structure as
* a whole must be a multiple of the greatest alignment of any member. */
f->field_index = i;
f->byte_offset = ALIGN_UP(m->size, type_info->align);
f->type = fd->type;
f->label = fd->label;
m->size = f->byte_offset + type_info->size;
max_align = UPB_MAX(max_align, type_info->align);
if(fd->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED)
m->num_required_fields++;
/* Insert into the tables. Note that f->ref will be uninitialized, even in
* the tables' copies of *f, which is why we must update them separately
* in upb_msg_setref() below. */
struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
upb_inttable_insert(&m->fields_by_num, &nument.e);
upb_strtable_insert(&m->fields_by_name, &strent.e);
}
if(max_align > 0)
m->size = ALIGN_UP(m->size, max_align);
}
void upb_msgdef_free(struct upb_msgdef *m)
{
upb_inttable_free(&m->fields_by_num);
upb_strtable_free(&m->fields_by_name);
free(m->fields);
free(m->field_descriptors);
}
void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
union upb_symbol_ref ref) {
struct google_protobuf_FieldDescriptorProto *d =
upb_msg_field_descriptor(f, m);
struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup(
&m->fields_by_num, d->number, sizeof(struct upb_fieldsbynum_entry));
struct upb_fieldsbyname_entry *str_e =
upb_strtable_lookup(&m->fields_by_name, d->name);
assert(int_e && str_e);
f->ref = ref;
int_e->f.ref = ref;
str_e->f.ref = ref;
}
/* Parsing. ******************************************************************/
struct upb_msgparser_frame {

@ -3,29 +3,14 @@
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* A upb_msgdef provides a full description of a message type as defined in a
* .proto file. Using a upb_msgdef, it is possible to treat an arbitrary hunk
* of memory (a void*) as a protobuf of the given type. We will call this
* void* a upb_msg in the context of this interface.
*
* Clients generally do not construct or destruct upb_msgdef objects directly.
* They are managed by upb_contexts, and clients can obtain upb_msgdef pointers
* directly from a upb_context.
* The upb_msg routines provide facilities for creating and manipulating
* messages according to a upb_msgdef definition.
*
* A upb_msg is READ-ONLY, and the upb_msgdef functions in this file provide
* read-only access. For a mutable message, or for a message that you can take
* a reference to to prevents its destruction, see upb_mm_msg.h, which is a
* layer on top of upb_msg that adds memory management semantics.
*
* upb_msgdef supports many features and operations for dealing with proto
* messages:
* - reflection over .proto types at runtime (list fields, get names, etc).
* - an in-memory byte-level format for efficiently storing and accessing msgs.
* - serializing from the in-memory format to a protobuf.
* - parsing from a protobuf to an in-memory data structure (you either
* supply callbacks for allocating/repurposing memory or use a simplified
* version that parses into newly-allocated memory).
*
* The in-memory format is very much like a C struct that you can define at
* run-time, but also supports reflection. Like C structs it supports
* offset-based access, as opposed to the much slower name-based lookup. The
@ -56,6 +41,7 @@
#include "descriptor.h"
#include "upb.h"
#include "upb_def.h"
#include "upb_parse.h"
#include "upb_table.h"
@ -63,78 +49,6 @@
extern "C" {
#endif
/* Message definition. ********************************************************/
struct upb_msg_fielddef;
struct upb_context;
/* Structure that describes a single .proto message type. */
struct upb_msgdef {
struct upb_context *context;
struct upb_msg *default_msg; /* Message with all default values set. */
struct google_protobuf_DescriptorProto *descriptor;
struct upb_string fqname; /* Fully qualified. */
size_t size;
uint32_t num_fields;
uint32_t set_flags_bytes;
uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */
struct upb_inttable fields_by_num;
struct upb_strtable fields_by_name;
struct upb_msg_fielddef *fields;
struct google_protobuf_FieldDescriptorProto **field_descriptors;
};
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
* critical path of parsing. Minimizing the size of this struct increases
* cache-friendliness. */
struct upb_msg_fielddef {
union upb_symbol_ref ref;
uint32_t byte_offset; /* Where to find the data. */
uint16_t field_index; /* Indexes upb_msgdef.fields and indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
upb_label_t label;
};
INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
return upb_issubmsgtype(f->type);
}
INLINE bool upb_isstring(struct upb_msg_fielddef *f) {
return upb_isstringtype(f->type);
}
INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
}
INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) {
return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}
INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) {
return upb_isstring(f) || upb_issubmsg(f);
}
/* Defined iff upb_field_ismm(f). */
INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) {
if(upb_isarray(f)) return UPB_MM_ARR_REF;
else if(upb_isstring(f)) return UPB_MM_STR_REF;
else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
else return -1;
}
/* Defined iff upb_elem_ismm(f). */
INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) {
if(upb_isstring(f)) return UPB_MM_STR_REF;
else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
else return -1;
}
/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
struct upb_msg_fielddef *f, struct upb_msgdef *m) {
return m->field_descriptors[f->field_index];
}
/* Message structure. *********************************************************/
/* Constructs a new msg corresponding to the given msgdef, and having one
@ -217,43 +131,6 @@ INLINE void upb_msg_clear(struct upb_msg *msg)
memset(msg->data, 0, msg->def->set_flags_bytes);
}
/* Number->field and name->field lookup. *************************************/
/* The num->field and name->field maps in upb_msgdef allow fast lookup of fields
* by number or name. These lookups are in the critical path of parsing and
* field lookup, so they must be as fast as possible. To make these more
* cache-friendly, we put the data in the table by value. */
struct upb_fieldsbynum_entry {
struct upb_inttable_entry e;
struct upb_msg_fielddef f;
};
struct upb_fieldsbyname_entry {
struct upb_strtable_entry e;
struct upb_msg_fielddef f;
};
/* Looks up a field by name or number. While these are written to be as fast
* as possible, it will still be faster to cache the results of this lookup if
* possible. These return NULL if no such field is found. */
INLINE struct upb_msg_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m,
uint32_t number) {
struct upb_fieldsbynum_entry *e =
(struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup(
&m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
return e ? &e->f : NULL;
}
INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
struct upb_string *name) {
struct upb_fieldsbyname_entry *e =
(struct upb_fieldsbyname_entry*)upb_strtable_lookup(
&m->fields_by_name, name);
return e ? &e->f : NULL;
}
/* Parsing ********************************************************************/
/* TODO: a stream parser. */
@ -319,42 +196,6 @@ void upb_msg_serialize_all(struct upb_msg *msg, struct upb_msgsizes *sizes,
bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive);
void upb_msg_print(struct upb_msg *data, bool single_line, FILE *stream);
/* Internal functions. ********************************************************/
/* Initializes/frees a upb_msgdef. Usually this will be called by upb_context,
* and clients will not have to construct one directly.
*
* Caller retains ownership of d, but the msg will contain references to it, so
* it must outlive the msg. Note that init does not resolve
* upb_msg_fielddef.ref the caller should do that post-initialization by
* calling upb_msg_ref() below.
*
* fqname indicates the fully-qualified name of this message. Ownership of
* fqname passes to the msg, but the msg will contain references to it, so it
* must outlive the msg.
*
* sort indicates whether or not it is safe to reorder the fields from the order
* they appear in d. This should be false if code has been compiled against a
* header for this type that expects the given order. */
void upb_msgdef_init(struct upb_msgdef *m,
struct google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort,
struct upb_context *c, struct upb_status *status);
void upb_msgdef_free(struct upb_msgdef *m);
/* Sort the given field descriptors in-place, according to what we think is an
* optimal ordering of fields. This can change from upb release to upb
* release. */
void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
size_t num);
/* Clients use this function on a previously initialized upb_msgdef to resolve
* the "ref" field in the upb_msg_fielddef. Since messages can refer to each
* other in mutually-recursive ways, this step must be separated from
* initialization. */
void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
union upb_symbol_ref ref);
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -9,6 +9,198 @@
#include <stddef.h>
#include <stdlib.h>
/* Functions to read wire values. *********************************************/
// These functions are internal to the parser, but might be moved into an
// internal header file if we at some point in the future opt to do code
// generation, because the generated code would want to inline these functions.
// The same applies to the functions to read .proto values below.
uint8_t *upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
struct upb_status *status);
// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT).
INLINE uint8_t *upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val,
struct upb_status *status)
{
// We inline this common case (1-byte varints), if that fails we dispatch to
// the full (non-inlined) version.
if((*buf & 0x80) == 0) {
*val = *buf & 0x7f;
return buf + 1;
} else {
return upb_get_v_uint64_t_full(buf, end, val, status);
}
}
// Gets a varint -- called when we only need 32 bits of it.
INLINE uint8_t *upb_get_v_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t *val, struct upb_status *status)
{
uint64_t val64;
uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status);
*val = (uint32_t)val64; // Discard the high bits.
return ret;
}
// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
INLINE uint8_t *upb_get_f_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t *val, struct upb_status *status)
{
uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
#if UPB_UNALIGNED_READS_OK
*val = *(uint32_t*)buf;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
*val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24);
#undef SHL
#endif
return uint32_end;
}
// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
INLINE uint8_t *upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
uint64_t *val, struct upb_status *status)
{
uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
#if UPB_UNALIGNED_READS_OK
*val = *(uint64_t*)buf;
#else
#define SHL(val, bits) ((uint64_t)val << bits)
*val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) |
SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56);
#undef SHL
#endif
return uint64_end;
}
INLINE uint8_t *upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end,
struct upb_status *status)
{
uint8_t *const maxend = buf + 10;
uint8_t last = 0x80;
for(; buf < (uint8_t*)end && (last & 0x80); buf++)
last = *buf;
if(buf >= end && buf <= maxend && (last & 0x80)) {
status->code = UPB_STATUS_NEED_MORE_DATA;
buf = end;
} else if(buf > maxend) {
status->code = UPB_ERROR_UNTERMINATED_VARINT;
buf = end;
}
return buf;
}
INLINE uint8_t *upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end,
struct upb_status *status)
{
uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
return uint32_end;
}
INLINE uint8_t *upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end,
struct upb_status *status)
{
uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
return uint64_end;
}
/* Functions to read .proto values. *******************************************/
// Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// Use macros to define a set of two functions for each .proto type:
//
// // Reads and converts a .proto value from buf, placing it in d.
// // "end" indicates the end of the current buffer (if the buffer does
// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned).
// // On success, a pointer will be returned to the first byte that was
// // not consumed.
// uint8_t *upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d,
// struct upb_status *status);
//
// // Given an already read wire value s (source), convert it to a .proto
// // value and return it.
// int32_t upb_wvtov_INT32(uint32_t s);
//
// These are the most efficient functions to call if you want to decode a value
// for a known type.
#define WVTOV(type, wire_t, val_t) \
INLINE val_t upb_wvtov_ ## type(wire_t s)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
INLINE uint8_t *upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, \
struct upb_status *status) { \
wire_t tmp = 0; \
uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \
*d = upb_wvtov_ ## type(tmp); \
return ret; \
}
#define T(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t); /* prototype for GET below */ \
GET(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t)
T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; }
T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); }
T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; }
T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; }
T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; }
T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(DOUBLE, f, uint64_t, double, _double) {
union upb_value v;
v.uint64 = s;
return v._double;
}
T(FLOAT, f, uint32_t, float, _float) {
union upb_value v;
v.uint32 = s;
return v._float;
}
#undef WVTOV
#undef GET
#undef T
// Parses a tag, places the result in *tag.
INLINE uint8_t *parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag,
struct upb_status *status)
{
uint32_t tag_int;
uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status);
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
return ret;
}
/**
* Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
* handles 1 and 2 byte varints).

@ -122,196 +122,6 @@ uint8_t *upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
union upb_wire_value *wv,
struct upb_status *status);
/* Functions to read wire values. *********************************************/
// Most clients will not want to use these directly.
uint8_t *upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
struct upb_status *status);
// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT).
INLINE uint8_t *upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val,
struct upb_status *status)
{
// We inline this common case (1-byte varints), if that fails we dispatch to
// the full (non-inlined) version.
if((*buf & 0x80) == 0) {
*val = *buf & 0x7f;
return buf + 1;
} else {
return upb_get_v_uint64_t_full(buf, end, val, status);
}
}
// Gets a varint -- called when we only need 32 bits of it.
INLINE uint8_t *upb_get_v_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t *val, struct upb_status *status)
{
uint64_t val64;
uint8_t *ret = upb_get_v_uint64_t(buf, end, &val64, status);
*val = (uint32_t)val64; // Discard the high bits.
return ret;
}
// Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
INLINE uint8_t *upb_get_f_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t *val, struct upb_status *status)
{
uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
#if UPB_UNALIGNED_READS_OK
*val = *(uint32_t*)buf;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
*val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24);
#undef SHL
#endif
return uint32_end;
}
// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
INLINE uint8_t *upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
uint64_t *val, struct upb_status *status)
{
uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
#if UPB_UNALIGNED_READS_OK
*val = *(uint64_t*)buf;
#else
#define SHL(val, bits) ((uint64_t)val << bits)
*val = SHL(buf[0], 0) | SHL(buf[1], 8) | SHL(buf[2], 16) | SHL(buf[3], 24) |
SHL(buf[4], 32) | SHL(buf[5], 40) | SHL(buf[6], 48) | SHL(buf[7], 56);
#undef SHL
#endif
return uint64_end;
}
INLINE uint8_t *upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end,
struct upb_status *status)
{
uint8_t *const maxend = buf + 10;
uint8_t last = 0x80;
for(; buf < (uint8_t*)end && (last & 0x80); buf++)
last = *buf;
if(buf >= end && buf <= maxend && (last & 0x80)) {
status->code = UPB_STATUS_NEED_MORE_DATA;
buf = end;
} else if(buf > maxend) {
status->code = UPB_ERROR_UNTERMINATED_VARINT;
buf = end;
}
return buf;
}
INLINE uint8_t *upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end,
struct upb_status *status)
{
uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
return uint32_end;
}
INLINE uint8_t *upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end,
struct upb_status *status)
{
uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) {
status->code = UPB_STATUS_NEED_MORE_DATA;
return end;
}
return uint64_end;
}
/* Functions to read .proto values. *******************************************/
// Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// Use macros to define a set of two functions for each .proto type:
//
// // Reads and converts a .proto value from buf, placing it in d.
// // "end" indicates the end of the current buffer (if the buffer does
// // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned).
// // On success, a pointer will be returned to the first byte that was
// // not consumed.
// uint8_t *upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d,
// struct upb_status *status);
//
// // Given an already read wire value s (source), convert it to a .proto
// // value and return it.
// int32_t upb_wvtov_INT32(uint32_t s);
//
// These are the most efficient functions to call if you want to decode a value
// for a known type.
#define WVTOV(type, wire_t, val_t) \
INLINE val_t upb_wvtov_ ## type(wire_t s)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
INLINE uint8_t *upb_get_ ## type(uint8_t *buf, uint8_t *end, val_t *d, \
struct upb_status *status) { \
wire_t tmp = 0; \
uint8_t *ret = upb_get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp, status); \
*d = upb_wvtov_ ## type(tmp); \
return ret; \
}
#define T(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t); /* prototype for GET below */ \
GET(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t)
T(INT32, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(INT64, v, uint64_t, int64_t, int64) { return (int64_t)s; }
T(UINT32, v, uint32_t, uint32_t, uint32) { return s; }
T(UINT64, v, uint64_t, uint64_t, uint64) { return s; }
T(SINT32, v, uint32_t, int32_t, int32) { return upb_zzdec_32(s); }
T(SINT64, v, uint64_t, int64_t, int64) { return upb_zzdec_64(s); }
T(FIXED32, f, uint32_t, uint32_t, uint32) { return s; }
T(FIXED64, f, uint64_t, uint64_t, uint64) { return s; }
T(SFIXED32, f, uint32_t, int32_t, int32) { return (int32_t)s; }
T(SFIXED64, f, uint64_t, int64_t, int64) { return (int64_t)s; }
T(BOOL, v, uint32_t, bool, _bool) { return (bool)s; }
T(ENUM, v, uint32_t, int32_t, int32) { return (int32_t)s; }
T(DOUBLE, f, uint64_t, double, _double) {
union upb_value v;
v.uint64 = s;
return v._double;
}
T(FLOAT, f, uint32_t, float, _float) {
union upb_value v;
v.uint32 = s;
return v._float;
}
#undef WVTOV
#undef GET
#undef T
// Parses a tag, places the result in *tag.
INLINE uint8_t *parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag,
struct upb_status *status)
{
uint32_t tag_int;
uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status);
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
return ret;
}
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -3,7 +3,7 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "upb_parse.h"
#include "upb_parse.c"
#include "upb_context.h"
int num_assertions = 0;

@ -13,7 +13,6 @@
#include <stdarg.h>
#include "descriptor.h"
#include "upb_context.h"
#include "upb_enum.h"
#include "upb_msg.h"
#include "upb_text.h"
#include "upb_array.h"

Loading…
Cancel
Save