Scheme for collecting circular refs.

"make descriptorgen" is now valgrind-clean again.
pull/13171/head
Joshua Haberman 16 years ago
parent 18291eedc3
commit 651c92ab33
  1. 7
      src/upb.h
  2. 20
      src/upb_atomic.h
  3. 167
      src/upb_def.c
  4. 27
      src/upb_def.h
  5. 5
      src/upb_mm.c
  6. 2
      src/upb_msg.c
  7. 2
      src/upb_text.c
  8. 13
      tools/upbc.c

@ -29,8 +29,11 @@ extern "C" {
// The maximum that any submessages can be nested. Matches proto2's limit.
#define UPB_MAX_NESTING 64
// The maximum number of fields that any one .proto type can have.
#define UPB_MAX_FIELDS (1<<16)
// The maximum number of fields that any one .proto type can have. Note that
// this is very different than the max field number. It is hard to imagine a
// scenario where more than 32k fields makes sense.
#define UPB_MAX_FIELDS (1<<15)
typedef int16_t upb_field_count_t;
// Nested type names are separated by periods.
#define UPB_SYMBOL_SEPARATOR '.'

@ -50,6 +50,14 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
return --a->val == 0;
}
INLINE int upb_atomic_read(upb_atomic_refcount_t *a) {
return a->val;
}
INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int val) {
return a->val += val;
}
typedef struct {
} upb_rwlock_t;
@ -84,10 +92,22 @@ INLINE bool upb_atomic_ref(upb_atomic_refcount_t *a) {
return __sync_fetch_and_add(&a->val, 1) == 0;
}
INLINE bool upb_atomic_add(upb_atomic_refcount_t *a, int n) {
return __sync_fetch_and_add(&a->val, n) == 0;
}
INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
return __sync_sub_and_fetch(&a->val, 1) == 0;
}
INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
return __sync_fetch_and_add(&a->val, 0);
}
INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) {
a->val = val;
}
#elif defined(WIN32)
/* Windows defines atomic increment/decrement. */

@ -19,11 +19,38 @@ static int div_round_up(int numerator, int denominator) {
/* upb_def ********************************************************************/
// Defs are reference counted, but can have cycles, so we need to be capable of
// collecting the cycles. In our situation defs are immutable (so cycles
// cannot be created or destroyed post-initialization). We need to be
// thread-safe but want to avoid locks if at all possible and rely only on
// atomic operations.
//
// Our scheme is as follows. First we give each def a flag indicating whether
// it is part of a cycle or not. Because defs are immutable, this flag will
// never change. For acyclic defs, we can use a naive algorithm and avoid
// the overhead of dealing with cycles. Most defs will be acyclic.
//
// For defs that participate in cycles we keep two reference counts. One
// tracks references that come from outside the cycle (we call these external
// references), and is incremented and decremented like a regular refcount.
// The other is a count of how many defs within any of our cycles have external
// references. When a def that is part of a cycle has an external reference,
// we say that it has a "cycle reference" on every def that it is part of a
// cycle with. We free the defs with cycles when the cycle refcount drops to
// zero (which cannot happen unless the external refcount drops to zero first).
//
// Note that all refs in a cycle will have the same cycle refcount, except for
// ones that belong to more than one cycle which will have the sum of the
// cycle refcounts for all their cycles.
//
// This algorithm is relatively cheap, since it only requires extra work on
// initialization and when the regular refcount drops to zero.
static void msgdef_free(struct upb_msgdef *m);
static void enumdef_free(struct upb_enumdef *e);
static void unresolveddef_free(struct upb_unresolveddef *u);
void _upb_def_free(struct upb_def *def)
static void def_free(struct upb_def *def)
{
switch(def->type) {
case UPB_DEF_MSG:
@ -46,12 +73,35 @@ void _upb_def_free(struct upb_def *def)
}
}
static void cycle_unref(struct upb_def *def) {
struct upb_msgdef *m = upb_downcast_msgdef(def);
assert(m);
for(int i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
if(upb_issubmsg(f)) cycle_unref(f->def);
}
if(upb_atomic_unref(&def->cycle_refcount)) def_free(def);
}
void _upb_def_reftozero(struct upb_def *def) {
if(def->is_cyclic) {
// Traverse the graph, decrementing the cycle refcounts of all cyclic defs
// and deleting any of them that fall to zero.
cycle_unref(def);
} else {
def_free(def);
}
}
void upb_def_init(struct upb_def *def, enum upb_def_type type,
struct upb_string *fqname) {
def->type = type;
def->is_cyclic = false; // We check for this later, after resolving refs.
def->visiting_submsg = 0;
def->fqname = fqname;
upb_string_ref(fqname);
upb_atomic_refcount_init(&def->refcount, 1);
upb_atomic_refcount_init(&def->cycle_refcount, 0);
}
void upb_def_uninit(struct upb_def *def) {
@ -74,8 +124,8 @@ static struct upb_unresolveddef *upb_unresolveddef_new(struct upb_string *str) {
}
static void unresolveddef_free(struct upb_unresolveddef *def) {
upb_def_uninit(&def->base);
upb_string_unref(def->name);
upb_def_uninit(&def->base);
free(def);
}
@ -161,8 +211,15 @@ static void fielddef_sort(struct upb_fielddef **defs, size_t num)
static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields,
int num_fields,
struct upb_string *fqname)
struct upb_string *fqname,
struct upb_status *status)
{
if(num_fields > UPB_MAX_FIELDS) {
upb_seterr(status, UPB_STATUS_ERROR,
"Tried to create a msgdef with more than %d fields", num_fields);
free(fields);
return NULL;
}
struct upb_msgdef *m = malloc(sizeof(*m));
upb_def_init(&m->base, UPB_DEF_MSG, fqname);
upb_inttable_init(&m->itof, num_fields, sizeof(struct upb_itof_ent));
@ -207,14 +264,20 @@ static struct upb_msgdef *msgdef_new(struct upb_fielddef **fields,
static void msgdef_free(struct upb_msgdef *m)
{
//upb_def_uninit(&m->base);
upb_inttable_free(&m->itof);
upb_strtable_free(&m->ntof);
for (unsigned int i = 0; i < m->num_fields; i++) {
for (upb_field_count_t i = 0; i < m->num_fields; i++)
fielddef_uninit(&m->fields[i]);
free(m->fields);
// Free refs from the strtable.
// TODO: once memory management for data is more developed, let the table
// handle freeing the refs itself.
struct upb_strtable_entry *e = upb_strtable_begin(&m->ntof);
for(; e; e = upb_strtable_next(&m->ntof, e)) {
upb_string_unref(e->key);
}
upb_strtable_free(&m->ntof);
upb_inttable_free(&m->itof);
upb_def_uninit(&m->base);
free(m->fields);
free(m);
}
@ -223,6 +286,7 @@ static void upb_msgdef_resolve(struct upb_msgdef *m, struct upb_fielddef *f,
(void)m;
upb_def_unref(f->def);
f->def = def;
// We will later remove this ref if it is a part of a cycle.
upb_def_ref(def);
}
@ -258,9 +322,17 @@ static struct upb_enumdef *enumdef_new(google_protobuf_EnumDescriptorProto *ed,
}
static void enumdef_free(struct upb_enumdef *e) {
upb_def_uninit(&e->base);
// Free refs from the strtable.
// TODO: once memory management for data is more developed, let the table
// handle freeing the refs itself.
struct upb_strtable_entry *ent = upb_strtable_begin(&e->ntoi);
for(; ent; ent = upb_strtable_next(&e->ntoi, ent)) {
upb_string_unref(ent->key);
}
upb_strtable_free(&e->ntoi);
upb_inttable_free(&e->iton);
upb_def_uninit(&e->base);
free(e);
}
@ -414,12 +486,16 @@ static void insert_message(struct upb_strtable *t,
fielddefs[i] = fielddef_new(fd);
}
if(sort) fielddef_sort(fielddefs, d->field->len);
e.def = UPB_UPCAST(msgdef_new(fielddefs, d->field->len, fqname));
upb_strtable_insert(t, &e.e);
e.def = UPB_UPCAST(msgdef_new(fielddefs, d->field->len, fqname, status));
for (int i = 0; i < num_fields; i++) {
fielddef_uninit(fielddefs[i]);
free(fielddefs[i]);
}
free(fielddefs);
if(!upb_ok(status)) return;
upb_strtable_insert(t, &e.e);
/* Add nested messages and enums. */
if(d->set_flags.has.nested_type)
@ -431,6 +507,55 @@ static void insert_message(struct upb_strtable *t,
insert_enum(t, d->enum_type->elements[i], fqname, status);
}
// Traverses the cycle again to count its length and adjust reference counts.
// Returns the total length of the cycle.
static int process_cycle(struct upb_msgdef *m, struct upb_msgdef *cycle_base,
int partial_cycle_len) {
if(m == cycle_base && partial_cycle_len > 0) {
return partial_cycle_len;
} else {
partial_cycle_len++;
struct upb_fielddef *f = &m->fields[UPB_UPCAST(m)->visiting_submsg - 1];
struct upb_msgdef *sub_m = upb_downcast_msgdef(f->def);
assert(sub_m);
int cycle_len = process_cycle(sub_m, cycle_base, partial_cycle_len);
// Since we know that each def has exactly one external ref (the symtab's
// ref), the number of defs in the cycle is the number of cycle refs we
// should add to each def in the cycle.
upb_atomic_add(&UPB_UPCAST(m)->cycle_refcount, cycle_len);
// Remove the regular ref that comes from the cycle.
bool zero = upb_atomic_unref(&UPB_UPCAST(m)->refcount);
assert(!zero); // The symtab should hold an external ref.
(void)zero; // Silence "zero not used" warnings on non-debug builds.
return cycle_len;
}
}
// Depth-first search, detecting and marking cycles.
static void find_cycles(struct upb_def *def) {
struct upb_msgdef *m = upb_downcast_msgdef(def);
if(!m) return;
if(def->visiting_submsg) {
// Cycle detected!
process_cycle(m, m, 0);
} else if(upb_atomic_read(&def->cycle_refcount)) {
// This def is part of at least one cycle, but we have already discovered
// this node (and consequently all of its cycles) by some other path.
} else {
for(int i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
// Mark the path we are currently on so we can easily retrace it in
// process_cycle().
UPB_UPCAST(m)->visiting_submsg = i + 1;
if(upb_issubmsg(f)) find_cycles(f->def);
}
UPB_UPCAST(m)->visiting_submsg = 0;
}
}
void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
google_protobuf_FileDescriptorProto *fd, bool sort,
struct upb_status *status)
@ -457,17 +582,20 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
if(should_unref) upb_string_unref(pkg);
if(!upb_ok(status)) return;
if(!upb_ok(status)) {
// TODO: make sure we don't leak any memory in this case.
return;
}
/* TODO: handle extensions and services. */
// Attempt to resolve all references.
struct symtab_ent *e;
for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) {
if(e->def->type != UPB_DEF_MSG) continue;
struct upb_msgdef *m = upb_downcast_msgdef(e->def);
if(!m) continue;
struct upb_string *base = e->e.key;
for(unsigned int i = 0; i < m->num_fields; i++) {
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
if(!upb_hasdef(f)) continue; // No resolving necessary.
struct upb_string *name = upb_downcast_unresolveddef(f->def)->name;
@ -487,6 +615,11 @@ void addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
upb_msgdef_resolve(m, f, found->def);
}
}
// Find cycles and update refcounts appropriately.
for(e = upb_strtable_begin(addto); e; e = upb_strtable_next(addto, &e->e)) {
find_cycles(e->def);
}
}
/* upb_symtab *****************************************************************/
@ -575,8 +708,8 @@ void upb_symtab_addfds(struct upb_symtab *s,
struct upb_status *status)
{
if(fds->set_flags.has.file) {
/* Insert new symbols into a temporary table until we have verified that
* the descriptor is valid. */
// Insert new symbols into a temporary table until we have verified that
// the descriptor is valid.
struct upb_strtable tmp;
upb_strtable_init(&tmp, 0, sizeof(struct symtab_ent));
upb_rwlock_rdlock(&s->lock);
@ -590,7 +723,7 @@ void upb_symtab_addfds(struct upb_symtab *s,
}
upb_rwlock_unlock(&s->lock);
/* Everything was successfully added, copy from the tmp symtable. */
// Everything was successfully added, copy from the tmp symtable.
struct symtab_ent *e;
{
upb_rwlock_wrlock(&s->lock);

@ -46,21 +46,36 @@ enum upb_def_type {
UPB_DEF_UNRESOLVED
};
// This typedef is more space-efficient than declaring an enum var directly.
typedef uint8_t upb_def_type_t;
// Common members.
struct upb_def {
struct upb_string *fqname; // Fully qualified.
enum upb_def_type type;
upb_def_type_t type;
// These members that pertain to cyclic collection could technically go in
// upb_msgdef instead of here, because only messages can be involved in
// cycles. However, putting them here is free from a space perspective
// because structure alignment will otherwise leave three bytes empty between
// type and refcount. It is also makes ref and unref more efficient, because
// we don't have to downcast to msgdef before checking the is_cyclic flag.
bool is_cyclic; // Is this def part of a cycle?
upb_field_count_t visiting_submsg; // Helper for depth-first search.
// See .c file for description of refcounting scheme.
upb_atomic_refcount_t refcount;
upb_atomic_refcount_t cycle_refcount;
};
void _upb_def_free(struct upb_def *def); // Must not be called directly!
void _upb_def_reftozero(struct upb_def *def); // Must not be called directly!
// Call to ref/deref a def.
INLINE void upb_def_ref(struct upb_def *def) {
upb_atomic_ref(&def->refcount);
}
INLINE void upb_def_unref(struct upb_def *def) {
if(upb_atomic_unref(&def->refcount)) _upb_def_free(def);
if(upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
// Downcasts. They are checked only if asserts are enabled.
@ -93,8 +108,8 @@ struct upb_fielddef {
struct upb_string *name;
// These are set only when this fielddef is part of a msgdef.
uint32_t byte_offset; // Where in a upb_msg to find the data.
uint16_t field_index; // Indicates set bit.
uint32_t byte_offset; // Where in a upb_msg to find the data.
upb_field_count_t field_index; // Indicates set bit.
// For the case of an enum or a submessage, points to the def for that type.
// We own a ref on this def.
@ -156,7 +171,7 @@ struct upb_msgdef {
struct upb_def base;
struct upb_msg *default_msg; // Message with all default values set.
size_t size;
uint32_t num_fields;
upb_field_count_t num_fields;
uint32_t set_flags_bytes;
uint32_t num_required_fields; // Required fields have the lowest set bytemasks.
struct upb_fielddef *fields; // We have exclusive ownership of these.

@ -18,8 +18,7 @@ static void upb_mm_destroy(union upb_value_ptr p, upb_mm_ptrtype type)
}
void upb_msg_destroy(struct upb_msg *msg) {
uint32_t i;
for(i = 0; i < msg->def->num_fields; i++) {
for(upb_field_count_t i = 0; i < msg->def->num_fields; i++) {
struct upb_fielddef *f = &msg->def->fields[i];
if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue;
upb_mm_destroy(upb_msg_getptr(msg, f), upb_field_ptrtype(f));
@ -196,7 +195,7 @@ void upb_mm_msgclear(struct upb_mm_ref *from_msg_ref, struct upb_fielddef *f)
void upb_mm_msgclear_all(struct upb_mm_ref *from)
{
struct upb_msgdef *def = from->p.msg->def;
for(uint32_t i = 0; i < def->num_fields; i++) {
for(upb_field_count_t i = 0; i < def->num_fields; i++) {
struct upb_fielddef *f = &def->fields[i];
if(!upb_field_ismm(f)) continue;
upb_mm_msgclear(from, f);

@ -401,7 +401,7 @@ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive)
* contain only primitive values (not strings, arrays, submessages, or
* padding) and memcmp the masked messages. */
for(uint32_t i = 0; i < m->num_fields; i++) {
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
bool msg1set = upb_msg_isset(msg1, f);
bool msg2set = upb_msg_isset(msg2, f);

@ -90,7 +90,7 @@ static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
FILE *stream)
{
struct upb_msgdef *m = msg->def;
for(uint32_t i = 0; i < m->num_fields; i++) {
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);

@ -170,7 +170,7 @@ static void write_h(struct upb_def *defs[], int num_defs, char *outfile_name,
fputs(" union {\n", stream);
fprintf(stream, " uint8_t bytes[%" PRIu32 "];\n", m->set_flags_bytes);
fputs(" struct {\n", stream);
for(uint32_t j = 0; j < m->num_fields; j++) {
for(upb_field_count_t j = 0; j < m->num_fields; j++) {
static char* labels[] = {"", "optional", "required", "repeated"};
struct upb_fielddef *f = &m->fields[j];
fprintf(stream, " bool " UPB_STRFMT ":1; /* = %" PRIu32 ", %s. */\n",
@ -178,7 +178,7 @@ static void write_h(struct upb_def *defs[], int num_defs, char *outfile_name,
}
fputs(" } has;\n", stream);
fputs(" } set_flags;\n", stream);
for(uint32_t j = 0; j < m->num_fields; j++) {
for(upb_field_count_t j = 0; j < m->num_fields; j++) {
struct upb_fielddef *f = &m->fields[j];
if(upb_issubmsg(f)) {
struct upb_string *type_name = upb_strdup(f->def->fqname);
@ -288,7 +288,7 @@ static void add_strings_from_value(union upb_value_ptr p,
static void add_strings_from_msg(struct upb_msg *msg, struct upb_strtable *t)
{
struct upb_msgdef *m = msg->def;
for(uint32_t i = 0; i < m->num_fields; i++) {
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);
@ -345,7 +345,7 @@ static void add_value(union upb_value_ptr p, struct upb_fielddef *f,
static void add_submsgs(struct upb_msg *msg, struct upb_strtable *t)
{
struct upb_msgdef *m = msg->def;
for(uint32_t i = 0; i < m->num_fields; i++) {
for(upb_field_count_t i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);
@ -502,7 +502,7 @@ static void write_message_c(struct upb_msg *msg, char *cident, char *hfile_name,
void *msgdata = val.msg;
/* Print set flags. */
fputs(" {.set_flags = {.has = {\n", stream);
for(unsigned int j = 0; j < m->num_fields; j++) {
for(upb_field_count_t j = 0; j < m->num_fields; j++) {
struct upb_fielddef *f = &m->fields[j];
fprintf(stream, " ." UPB_STRFMT " = ", UPB_STRARG(f->name));
if(upb_msg_isset(msgdata, f))
@ -513,7 +513,7 @@ static void write_message_c(struct upb_msg *msg, char *cident, char *hfile_name,
}
fputs(" }},\n", stream);
/* Print msg data. */
for(unsigned int j = 0; j < m->num_fields; j++) {
for(upb_field_count_t j = 0; j < m->num_fields; j++) {
struct upb_fielddef *f = &m->fields[j];
union upb_value val = upb_value_read(upb_msg_getptr(msgdata, f), f->type);
fprintf(stream, " ." UPB_STRFMT " = ", UPB_STRARG(f->name));
@ -583,6 +583,7 @@ static void write_message_c(struct upb_msg *msg, char *cident, char *hfile_name,
/* Free tables. */
for(e = upb_strtable_begin(&types); e; e = upb_strtable_next(&types, &e->e)) {
upb_string_unref(e->e.key);
upb_string_unref(e->cident);
free(e->values);
free(e->arrays);

Loading…
Cancel
Save