More work on upb_context.

pull/13171/head
Joshua Haberman 16 years ago
parent 3a67a1e9f9
commit 2da3b081c4
  1. 16
      upb.h
  2. 180
      upb_context.c
  3. 27
      upb_context.h
  4. 60
      upb_enum.h
  5. 20
      upb_msg.h
  6. 9
      upb_table.h

16
upb.h

@ -29,6 +29,9 @@ extern "C" {
/* The maximum that any submessages can be nested. Matches proto2's limit. */
#define UPB_MAX_NESTING 64
/* The maximum number of fields that any one .proto type can have. */
#define UPB_MAX_FIELDS (1<<16)
/* Represents a string or bytes. */
struct upb_string {
/* We expect the data to be 8-bit clean (uint8_t), but char* is such an
@ -97,6 +100,19 @@ struct upb_tag {
upb_wire_type_t wire_type;
};
enum upb_symbol_type {
UPB_SYM_MESSAGE,
UPB_SYM_ENUM,
UPB_SYM_SERVICE,
UPB_SYM_EXTENSION
};
union upb_symbol_ref {
struct upb_msg *msg;
struct upb_enum *_enum;
struct upb_svc *svc;
};
/* Status codes used as a return value. */
typedef enum upb_status {
UPB_STATUS_OK = 0,

@ -8,8 +8,10 @@
#include <string.h>
#include "descriptor.h"
#include "upb_context.h"
#include "upb_enum.h"
#include "upb_msg.h"
int memrchr(char *data, char c, size_t len)
static int memrchr(char *data, char c, size_t len)
{
int off = len-1;
while(off > 0 && data[off] != c) --off;
@ -20,7 +22,7 @@ bool upb_context_init(struct upb_context *c)
{
upb_strtable_init(&c->symtab, 16, sizeof(struct upb_symtab_entry));
/* Add all the types in descriptor.proto so we can parse descriptors. */
if(!upb_context_addfd(c, &google_protobuf_filedescriptor, UPB_ONREDEF_ERROR))
if(!upb_context_addfd(c, &google_protobuf_filedescriptor))
return false; /* Indicates that upb is buggy or corrupt. */
return true;
}
@ -36,9 +38,9 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c,
return upb_strtable_lookup(&c->symtab, symbol);
}
struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
struct upb_string *base,
struct upb_string *symbol)
static struct upb_symtab_entry *resolve(struct upb_strtable *t,
struct upb_string *base,
struct upb_string *symbol)
{
if(base->byte_len + symbol->byte_len + 1 >= UPB_SYM_MAX_LENGTH ||
symbol->byte_len == 0) return NULL;
@ -47,7 +49,7 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
/* Symbols starting with '.' are absolute, so we do a single lookup. */
struct upb_string sym_str = {.data = symbol->data+1,
.byte_len = symbol->byte_len-1};
return upb_context_lookup(c, &sym_str);
return upb_strtable_lookup(t, &sym_str);
} else {
/* Remove components from base until we find an entry or run out. */
char sym[UPB_SYM_MAX_LENGTH+1];
@ -60,7 +62,7 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
memcpy(sym + baselen + 1, symbol->data, symbol->byte_len);
sym_str.byte_len = baselen + symbol->byte_len + 1;
struct upb_symtab_entry *e = upb_context_lookup(c, &sym_str);
struct upb_symtab_entry *e = upb_strtable_lookup(t, &sym_str);
if (e) return e;
else if(baselen == 0) return NULL; /* No more scopes to try. */
@ -69,23 +71,161 @@ struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
}
}
union upb_symbol_ref resolve2(struct upb_strtable *t1, struct upb_strtable *t2,
struct upb_string *base, struct upb_string *sym,
enum upb_symbol_type expected_type) {
union upb_symbol_ref nullref = {.msg = NULL};
struct upb_symtab_entry *e = resolve(t1, base, sym);
if(e == NULL) e = resolve(t2, base, sym);
if(e && e->type == expected_type) return e->ref;
else return nullref;
}
struct upb_symtab_entry *upb_context_resolve(struct upb_context *c,
struct upb_string *base,
struct upb_string *symbol) {
return resolve(&c->symtab, base, symbol);
}
/* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
* join("", "Baz") -> "Baz"
* Caller owns the returned string and must free it. */
static struct upb_string join(struct upb_string *base, struct upb_string *name) {
size_t len = base->byte_len + name->byte_len;
if(base->byte_len > 0) len++; /* For the separator. */
struct upb_string joined = {.byte_len=len, .data=malloc(len)};
if(base->byte_len > 0) {
/* nested_base = base + '.' + d->name */
memcpy(joined.data, base->data, base->byte_len);
joined.data[base->byte_len] = UPB_CONTEXT_SEPARATOR;
memcpy(&joined.data[base->byte_len+1], name->data, name->byte_len);
} else {
memcpy(joined.data, name->data, name->byte_len);
}
return joined;
}
static bool insert_enum(struct upb_strtable *t,
google_protobuf_EnumDescriptorProto *ed,
struct upb_string *base)
{
if(!ed->set_flags.has.name) return false;
/* We own this and must free it on destruct. */
struct upb_string fqname = join(base, ed->name);
/* Redefinition within a FileDescriptorProto is not allowed. */
if(upb_strtable_lookup(t, &fqname)) {
free(fqname.data);
return false;
}
struct upb_symtab_entry e;
e.e.key = fqname;
e.type = UPB_SYM_ENUM;
e.ref._enum = malloc(sizeof(*e.ref._enum));
upb_enum_init(e.ref._enum, ed);
upb_strtable_insert(t, &e.e);
return true;
}
static bool insert_message(struct upb_strtable *t,
google_protobuf_DescriptorProto *d,
struct upb_string *base)
{
if(!d->set_flags.has.name) return false;
/* We own this and must free it on destruct. */
struct upb_string fqname = join(base, d->name);
/* Redefinition within a FileDescriptorProto is not allowed. */
if(upb_strtable_lookup(t, d->name)) {
free(fqname.data);
return false;
}
struct upb_symtab_entry e;
e.e.key = fqname;
e.type = UPB_SYM_MESSAGE;
e.ref.msg = malloc(sizeof(*e.ref.msg));
upb_msg_init(e.ref.msg, d);
upb_strtable_insert(t, &e.e);
/* Add nested messages and enums. */
if(d->set_flags.has.nested_type)
for(unsigned int i = 0; i < d->nested_type->len; i++)
if(!insert_message(t, d->nested_type->elements[i], &fqname))
return false;
if(d->set_flags.has.enum_type)
for(unsigned int i = 0; i < d->enum_type->len; i++)
if(!insert_enum(t, d->enum_type->elements[i], &fqname))
return false;
return true;
}
bool upb_context_addfd(struct upb_context *c,
google_protobuf_FileDescriptorProto *fd,
int onredef)
google_protobuf_FileDescriptorProto *fd)
{
struct upb_string package = {.byte_len=0};
if(fd->set_flags.has.package) package = *fd->package;
/* We want the entire add operation to be atomic, so we initially insert into
* this temporary map of symbols. Once we have verified that there are no
* errors (all symbols can be resolved and no illegal redefinitions occurred)
* only then do we insert into the context's table. */
struct upb_strtable tmp;
int symcount = (fd->set_flags.has.message_type ? fd->message_type->len : 0) +
(fd->set_flags.has.enum_type ? fd->enum_type->len : 0) +
(fd->set_flags.has.service ? fd->service->len : 0);
upb_strtable_init(&tmp, symcount, sizeof(struct upb_symtab_entry));
/* TODO: properly handle redefinitions and unresolvable symbols. */
if(fd->set_flags.has.message_type) {
for(unsigned int i = 0; i < fd->message_type->len; i++) {
struct google_protobuf_DescriptorProto *d = fd->message_type->elements[i];
if(!d->set_flags.has.name) return false;
struct upb_symtab_entry e;
e.e.key = *d->name;
e.type = UPB_SYM_MESSAGE;
e.p.msg = malloc(sizeof(*e.p.msg));
upb_msg_init(e.p.msg, d);
upb_strtable_insert(&c->symtab, &e.e);
if(fd->set_flags.has.message_type)
for(unsigned int i = 0; i < fd->message_type->len; i++)
if(!insert_message(&tmp, fd->message_type->elements[i], &package))
goto error;
if(fd->set_flags.has.enum_type)
for(unsigned int i = 0; i < fd->enum_type->len; i++)
if(!insert_enum(&tmp, fd->enum_type->elements[i], &package))
goto error;
/* TODO: handle extensions and services. */
/* Attempt to resolve all references. */
struct upb_symtab_entry *e;
for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e)) {
if(upb_strtable_lookup(&c->symtab, &e->e.key))
goto error; /* Redefinition prohibited. */
if(e->type == UPB_SYM_MESSAGE) {
struct upb_msg *m = e->ref.msg;
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = f->descriptor;
union upb_symbol_ref ref;
if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE)
ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_MESSAGE);
else if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM)
ref = resolve2(&c->symtab, &tmp, &e->e.key, fd->name, UPB_SYM_ENUM);
else
continue; /* No resolving necessary. */
if(!ref.msg) goto error;
upb_msg_ref(m, f, &ref);
}
}
}
/* TODO: handle enums, extensions, and services. */
/* All references were successfully resolved -- add to the symbol table. */
for(e = upb_strtable_begin(&tmp); e; e = upb_strtable_next(&tmp, &e->e))
upb_strtable_insert(&c->symtab, &e->e);
return true;
error:
return false;
}

@ -19,21 +19,10 @@
extern "C" {
#endif
enum upb_symbol_type {
UPB_SYM_MESSAGE,
UPB_SYM_ENUM,
UPB_SYM_SERVICE,
UPB_SYM_EXTENSION
};
struct upb_symtab_entry {
struct upb_strtable_entry e;
enum upb_symbol_type type;
union {
struct upb_msg *msg;
struct upb_enum *_enum;
struct upb_svc *svc;
} p;
union upb_symbol_ref ref;
};
struct upb_context {
@ -73,23 +62,15 @@ struct upb_symtab_entry *upb_context_lookup(struct upb_context *c,
/* Adding symbols. ************************************************************/
/* Enum controlling what happens if a symbol is redefined. */
enum upb_onredef {
UPB_ONREDEF_REPLACE, /* Replace existing definition (must be same type). */
UPB_ONREDEF_KEEP, /* Keep existing definition, ignore new one. */
UPB_ONREDEF_ERROR /* Error on redefinition. */
};
/* Adds the definitions in the given file descriptor to this context. All
* types that are referenced from fd must have previously been defined (or be
* defined in fd). onredef controls the behavior in the case that fd attempts
* to define a type that is already defined.
* defined in fd). fd may not attempt to define any names that are already
* defined in this context.
*
* Caller retains ownership of fd, but the context will contain references to
* it, so it must outlive the context. */
bool upb_context_addfd(struct upb_context *c,
google_protobuf_FileDescriptorProto *fd,
int onredef);
google_protobuf_FileDescriptorProto *fd);
/* Adds the serialized FileDescriptorSet proto contained in fdss to the context,
* and adds symbol table entries for all the objects defined therein. onredef

@ -0,0 +1,60 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* upb_enum is a simple object that allows run-time reflection over the values
* defined within an enum. */
#ifndef UPB_ENUM_H_
#define UPB_ENUM_H_
#include <stdint.h>
#include "upb_table.h"
/* Forward declaration from descriptor.h. */
struct google_protobuf_EnumDescriptorProto;
struct google_protobuf_EnumValueDescriptorProto;
struct upb_enum {
struct google_protobuf_EnumDescriptorProto *descriptor;
struct upb_strtable nametoint;
struct upb_inttable inttoname;
};
struct upb_enum_ntoi_entry {
struct upb_strtable_entry e;
uint32_t value;
};
struct upb_enum_iton_entry {
struct upb_inttable_entry e;
struct upb_string *string;
};
/* Initializes and frees an enum, respectively. Caller retains ownership of
* ed, but it must outlive e. */
INLINE void upb_enum_init(struct upb_enum *e,
struct google_protobuf_EnumDescriptorProto *ed) {
int num_values = ed->set_flags.has.value ? ed->value->len : 0;
e->descriptor = ed;
upb_strtable_init(&e->nametoint, num_values, sizeof(struct upb_enum_ntoi_entry));
upb_inttable_init(&e->inttoname, num_values, sizeof(struct upb_enum_iton_entry));
for(int i = 0; i < num_values; i++) {
google_protobuf_EnumValueDescriptorProto *value = ed->value->elements[i];
struct upb_enum_ntoi_entry ntoi_entry = {.e = {.key = *value->name},
.value = value->number};
struct upb_enum_iton_entry iton_entry = {.e = {.key = value->number},
.string = value->name};
upb_strtable_insert(&e->nametoint, &ntoi_entry.e);
upb_inttable_insert(&e->inttoname, &iton_entry.e);
}
}
INLINE void upb_enum_free(struct upb_enum *e) {
upb_strtable_free(&e->nametoint);
upb_inttable_free(&e->inttoname);
}
#endif /* UPB_ENUM_H_ */

@ -30,8 +30,8 @@
* management semantics can be used with the format as defined here.
*/
#ifndef PBSTRUCT_H_
#define PBSTRUCT_H_
#ifndef UPB_MSG_H_
#define UPB_MSG_H_
#include <stdbool.h>
#include <stddef.h>
@ -51,18 +51,12 @@ struct google_protobuf_FieldDescriptorProto;
/* Structure definition. ******************************************************/
/* Fields that reference other types have pointers to the other type. */
union upb_msg_field_ref {
struct upb_msg *msg; /* Set if type == MESSAGE */
struct upb_enum *_enum; /* Set if type == ENUM */
};
/* Structure that describes a single field in a message. */
struct upb_msg_field {
struct google_protobuf_FieldDescriptorProto *descriptor;
uint32_t byte_offset; /* Where to find the data. */
uint32_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
union upb_msg_field_ref ref;
uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
union upb_symbol_ref ref;
};
/* Structure that describes a single .proto message type. */
@ -87,9 +81,9 @@ struct upb_msg {
struct upb_abbrev_msg_field {
uint32_t byte_offset; /* Where to find the data. */
uint32_t field_index:24; /* Indexes upb_msg.fields. Also indicates set bit */
uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
union upb_msg_field_ref ref;
union upb_symbol_ref ref;
};
struct upb_fieldsbynum_entry {
@ -282,4 +276,4 @@ INLINE void upb_msg_clear(void *s, struct upb_msg *m)
} /* extern "C" */
#endif
#endif /* PBSTRUCT_H_ */
#endif /* UPB_MSG_H_ */

@ -102,6 +102,15 @@ INLINE void *upb_inttable_lookup(struct upb_inttable *t,
void *upb_strtable_lookup(struct upb_strtable *t, struct upb_string *key);
/* Provides iteration over the table. The order in which the entries are
* returned is undefined. Insertions invalidate iterators. The _next
* functions return NULL when the end has been reached. */
void *upb_inttable_begin(struct upb_inttable *t);
void *upb_inttable_next(struct upb_inttable *t, struct upb_inttable_entry *cur);
void *upb_strtable_begin(struct upb_strtable *t);
void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur);
#ifdef __cplusplus
} /* extern "C" */
#endif

Loading…
Cancel
Save