Lots of documentation, cleanup, and fixed memory leaks.

pull/13171/head
Joshua Haberman 16 years ago
parent dd2094537a
commit 5235966ed5
  1. 2
      Makefile
  2. 81
      upb.h
  3. 17
      upb_context.c
  4. 20
      upb_context.h
  5. 102
      upb_msg.c
  6. 339
      upb_msg.h
  7. 27
      upb_parse.h
  8. 5
      upbc.c

@ -3,7 +3,7 @@
CC=gcc
CXX=g++
CFLAGS=-std=c99
CPPFLAGS=-O0 -Wall -Wextra -pedantic -g -DUPB_UNALIGNED_READS_OK -fomit-frame-pointer
CPPFLAGS=-Wall -Wextra -pedantic -g -DUPB_UNALIGNED_READS_OK -fomit-frame-pointer
OBJ=upb_parse.o upb_table.o upb_msg.o upb_enum.o upb_context.o descriptor.o
all: $(OBJ) test_table tests upbc
clean:

81
upb.h

@ -1,7 +1,9 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file contains shared definitions that are widely used across upb.
*/
#ifndef UPB_H_
@ -36,29 +38,14 @@ extern "C" {
/* The maximum number of fields that any one .proto type can have. */
#define UPB_MAX_FIELDS (1<<16)
/* Nested type names are separated by periods. */
#define UPB_SYMBOL_SEPARATOR '.'
#define UPB_SYMBOL_MAX_LENGTH 256
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
INLINE uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
/* A list of types as they are encoded on-the-wire. */
enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
};
typedef uint8_t upb_wire_type_t;
/* A value as it is encoded on-the-wire, except delimited, which is handled
* separately. */
union upb_wire_value {
uint64_t varint;
uint64_t _64bit;
uint32_t _32bit;
};
/* Value type as defined in a .proto file. The values of this are defined by
* google_protobuf_FieldDescriptorProto_Type (from descriptor.proto).
* Note that descriptor.proto reserves "0" for errors, and we use it to
@ -76,48 +63,36 @@ struct upb_type_info {
uint8_t expected_wire_type;
};
/* This array is indexed by upb_field_type_t. */
/* Contains information for all .proto types. Indexed by upb_field_type_t. */
extern struct upb_type_info upb_type_info[];
/* A scalar value as described in a .proto file */
/* A pointer to a .proto value. The owner must have an out-of-band way of
* knowing the type, so it knows which union member to use. */
union upb_value {
double _double;
float _float;
int32_t int32;
int64_t int64;
double _double;
float _float;
int32_t int32;
int64_t int64;
uint32_t uint32;
uint64_t uint64;
bool _bool;
bool _bool;
struct upb_string **string;
struct upb_array **array;
void *message;
};
union upb_value_ptr {
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
bool *_bool;
struct upb_string **string;
struct upb_array **array;
void **message;
void *_void;
};
/* The number of a field, eg. "optional string foo = 3". */
typedef int32_t upb_field_number_t;
/* A tag occurs before each value on-the-wire. */
struct upb_tag {
upb_field_number_t field_number;
upb_wire_type_t wire_type;
};
enum upb_symbol_type {
UPB_SYM_MESSAGE,
UPB_SYM_ENUM,
UPB_SYM_SERVICE,
UPB_SYM_EXTENSION
void **message;
void *_void;
};
union upb_symbol_ref {
@ -126,7 +101,11 @@ union upb_symbol_ref {
struct upb_svc *svc;
};
/* Status codes used as a return value. */
/* The number of a field, eg. "optional string foo = 3". */
typedef int32_t upb_field_number_t;
/* Status codes used as a return value. Codes >0 are not fatal and can be
* resumed. */
typedef enum upb_status {
UPB_STATUS_OK = 0,

@ -58,8 +58,9 @@ static void free_symtab(struct upb_strtable *t)
void upb_context_free(struct upb_context *c)
{
free_symtab(&c->symtab);
for(size_t i = 0; i < c->fds_len; i++)
upb_msgdata_free(c->fds[i], c->fds_msg, true);
free_symtab(&c->psymtab);
for(size_t i = 0; i < c->fds_len; i++) free(c->fds[i]);
free(c->fds);
}
@ -73,23 +74,23 @@ static struct upb_symtab_entry *resolve(struct upb_strtable *t,
struct upb_string *base,
struct upb_string *symbol)
{
if(base->byte_len + symbol->byte_len + 1 >= UPB_SYM_MAX_LENGTH ||
if(base->byte_len + symbol->byte_len + 1 >= UPB_SYMBOL_MAX_LENGTH ||
symbol->byte_len == 0) return NULL;
if(symbol->ptr[0] == UPB_CONTEXT_SEPARATOR) {
if(symbol->ptr[0] == UPB_SYMBOL_SEPARATOR) {
/* Symbols starting with '.' are absolute, so we do a single lookup. */
struct upb_string sym_str = {.ptr = symbol->ptr+1,
.byte_len = symbol->byte_len-1};
return upb_strtable_lookup(t, &sym_str);
} else {
/* Remove components from base until we find an entry or run out. */
char sym[UPB_SYM_MAX_LENGTH+1];
char sym[UPB_SYMBOL_MAX_LENGTH+1];
struct upb_string sym_str = {.ptr = sym};
int baselen = base->byte_len;
while(1) {
/* sym_str = base[0...base_len] + UPB_CONTEXT_SEPARATOR + symbol */
/* sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + symbol */
memcpy(sym, base->ptr, baselen);
sym[baselen] = UPB_CONTEXT_SEPARATOR;
sym[baselen] = UPB_SYMBOL_SEPARATOR;
memcpy(sym + baselen + 1, symbol->ptr, symbol->byte_len);
sym_str.byte_len = baselen + symbol->byte_len + 1;
@ -97,7 +98,7 @@ static struct upb_symtab_entry *resolve(struct upb_strtable *t,
if (e) return e;
else if(baselen == 0) return NULL; /* No more scopes to try. */
baselen = memrchr(base->ptr, UPB_CONTEXT_SEPARATOR, baselen);
baselen = memrchr(base->ptr, UPB_SYMBOL_SEPARATOR, baselen);
}
}
}
@ -130,7 +131,7 @@ static struct upb_string join(struct upb_string *base, struct upb_string *name)
if(base->byte_len > 0) {
/* nested_base = base + '.' + d->name */
memcpy(joined.ptr, base->ptr, base->byte_len);
joined.ptr[base->byte_len] = UPB_CONTEXT_SEPARATOR;
joined.ptr[base->byte_len] = UPB_SYMBOL_SEPARATOR;
memcpy(&joined.ptr[base->byte_len+1], name->ptr, name->byte_len);
} else {
memcpy(joined.ptr, name->ptr, name->byte_len);

@ -2,8 +2,7 @@
* upb - a minimalist implementation of protocol buffers.
*
* A context represents a namespace of proto definitions, sort of like an
* interpreter's symbol table. It is empty when first constructed, with the
* exception of built-in types (those defined in descriptor.proto). Clients
* interpreter's symbol table. It is empty when first constructed. Clients
* add definitions to the context by supplying unserialized or serialized
* descriptors (as defined in descriptor.proto).
*
@ -22,6 +21,16 @@ struct google_protobuf_FileDescriptorProto;
extern "C" {
#endif
/* Definitions. ***************************************************************/
/* The symbol table maps names to various kinds of symbols. */
enum upb_symbol_type {
UPB_SYM_MESSAGE,
UPB_SYM_ENUM,
UPB_SYM_SERVICE,
UPB_SYM_EXTENSION
};
struct upb_symtab_entry {
struct upb_strtable_entry e;
enum upb_symbol_type type;
@ -39,17 +48,12 @@ struct upb_context {
struct google_protobuf_FileDescriptorSet **fds;
};
/* Initializes and frees a upb_context, respectively. Newly initialized
* contexts will always have the types in descriptor.proto defined. */
/* Initializes and frees a upb_context, respectively. */
bool upb_context_init(struct upb_context *c);
void upb_context_free(struct upb_context *c);
/* Looking up symbols. ********************************************************/
/* Nested type names are separated by periods. */
#define UPB_CONTEXT_SEPARATOR '.'
#define UPB_SYM_MAX_LENGTH 256
/* Resolves the given symbol using the rules described in descriptor.proto,
* namely:
*

@ -9,6 +9,7 @@
#include "upb_msg.h"
#include "upb_parse.h"
/* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) (p % t == 0 ? p : p + (t - (p % t)))
static int div_round_up(int numerator, int denominator) {
@ -55,6 +56,7 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d)
/* We count on the caller to keep this pointer alive. */
m->field_descriptors[i] = d->field->elements[i];
}
/* TODO: re-enable proper sorting once the compiler is sorted out. */
//qsort(m->field_descriptors, m->num_fields, sizeof(void*), compare_fields);
size_t max_align = 0;
@ -77,7 +79,7 @@ bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d)
/* Insert into the tables. Note that f->ref will be uninitialized, even in
* the tables' copies of *f, which is why we must update them separately
* when the references are resolved. */
* in upb_msg_ref() below. */
struct upb_fieldsbynum_entry nument = {.e = {.key = fd->number}, .f = *f};
struct upb_fieldsbyname_entry strent = {.e = {.key = *fd->name}, .f = *f};
upb_inttable_insert(&m->fields_by_num, &nument.e);
@ -96,15 +98,6 @@ void upb_msg_free(struct upb_msg *m)
free(m->field_descriptors);
}
void *upb_msg_new(struct upb_msg *m)
{
void *msg = malloc(m->size);
memset(msg, 0, m->size); /* Clear all pointers, values, and set bits. */
return msg;
}
//void upb_msg_free(void *msg, struct upb_msg *m, bool free_submsgs);
void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f,
union upb_symbol_ref ref) {
struct google_protobuf_FieldDescriptorProto *d =
@ -119,23 +112,35 @@ void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f,
str_e->f.ref = ref;
}
/* Memory management *********************************************************/
/* Our memory management scheme is as follows:
*
* All pointers to dynamic memory (strings, arrays, and submessages) are
* expected to be good pointers if they are non-zero, *regardless* of whether
* that field's bit is set! That way we can reuse the memory even if the field
* is unset and then set later. */
/* For our memory-managed strings and arrays we store extra information
* (compared to a plain upb_string or upb_array). But the data starts with
* a upb_string and upb_array, so we can overlay onto the regular types. */
struct mm_upb_string {
struct upb_string s;
/* Track the allocated size, so we know when we need to reallocate. */
uint32_t size;
/* Our allocated data. Stored separately so that clients can point s.ptr to
* a referenced string, but we can reuse this data later. */
char *data;
};
struct mm_upb_array {
struct upb_array a;
/* Track the allocated size, so we know when we need to reallocate. */
uint32_t size;
char *data;
};
static uint32_t round_up_to_pow2(uint32_t v)
{
#if 0 // __GNUC__
return (1U<<31) >> (__builtin_clz(v-1)+1);
#else
/* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
v--;
v |= v >> 1;
@ -145,7 +150,54 @@ static uint32_t round_up_to_pow2(uint32_t v)
v |= v >> 16;
v++;
return v;
#endif
}
void *upb_msgdata_new(struct upb_msg *m)
{
void *msg = malloc(m->size);
memset(msg, 0, m->size); /* Clear all pointers, values, and set bits. */
return msg;
}
static void free_value(union upb_value_ptr p, struct upb_msg_field *f,
bool free_submsgs)
{
switch(f->type) {
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: {
struct mm_upb_string *mm_str = (void*)*p.string;
if(mm_str) {
free(mm_str->data);
free(mm_str);
}
break;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
if(free_submsgs) upb_msgdata_free(*p.message, f->ref.msg, free_submsgs);
break;
default: break; /* For non-dynamic types, do nothing. */
}
}
void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs)
{
if(!data) return; /* A very free-like thing to do. */
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
union upb_value_ptr p = upb_msg_getptr(data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
if(*p.array) {
for(uint32_t j = 0; j < (*p.array)->len; j++)
free_value(upb_array_getelementptr(*p.array, j, f->type),
f, free_submsgs);
free((*p.array)->elements._void);
free(*p.array);
}
} else {
free_value(p, f, free_submsgs);
}
}
free(data);
}
void upb_msg_reuse_str(struct upb_string **str, uint32_t size)
@ -185,11 +237,11 @@ void upb_msg_reuse_strref(struct upb_string **str) { upb_msg_reuse_str(str, 0);
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m)
{
if(!*msg) *msg = upb_msg_new(m);
if(!*msg) *msg = upb_msgdata_new(m);
else upb_msg_clear(*msg, m); /* Clears set bits, leaves pointers. */
}
/* Parser. */
/* Serialization/Deserialization. ********************************************/
struct parse_frame_data {
struct upb_msg *m;
@ -217,7 +269,7 @@ static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag,
static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f)
{
union upb_value_ptr p = upb_msg_get_ptr(data, f);
union upb_value_ptr p = upb_msg_getptr(data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
size_t len = upb_msg_is_set(data, f) ? (*p.array)->len : 0;
upb_msg_reuse_array(p.array, len+1, f->type);
@ -270,25 +322,18 @@ static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc)
if(!s->merge) upb_msg_clear(frame->data, f->ref.msg);
}
static void submsg_end_cb(struct upb_parse_state *s)
{
struct parse_frame_data *frame = (void*)&s->top->user_data;
}
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref)
{
upb_parse_init(&s->s, sizeof(struct parse_frame_data));
s->merge = merge;
s->byref = byref;
if(!merge && msg == NULL) msg = upb_msg_new(m);
if(!merge && msg == NULL) msg = upb_msgdata_new(m);
set_frame_data(&s->s, m, msg);
s->s.tag_cb = tag_cb;
s->s.value_cb = value_cb;
s->s.str_cb = str_cb;
s->s.submsg_start_cb = submsg_start_cb;
s->s.submsg_end_cb = submsg_end_cb;
}
void upb_msg_parse_free(struct upb_msg_parse_state *s)
@ -305,10 +350,11 @@ upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref)
{
struct upb_msg_parse_state s;
void *msg = upb_msg_new(m);
void *msg = upb_msgdata_new(m);
upb_msg_parse_init(&s, msg, m, false, byref);
size_t read;
upb_status_t status = upb_msg_parse(&s, str->ptr, str->byte_len, &read);
upb_msg_parse_free(&s);
if(status == UPB_STATUS_OK && read == str->byte_len) {
return msg;
} else {
@ -370,7 +416,7 @@ void upb_msg_print(void *data, struct upb_msg *m, FILE *stream)
if(upb_msg_is_set(data, f)) fputs(" (set): ", stream);
else fputs(" (NOT set): ", stream);
union upb_value_ptr p = upb_msg_get_ptr(data, f);
union upb_value_ptr p = upb_msg_getptr(data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
if(*p.array) {
fputc('[', stream);

@ -3,46 +3,100 @@
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* upb_msg contains a full description of a message as defined in a .proto file.
* It supports many features and operations for dealing with proto messages:
* A upb_msg provides a full description of a message as defined in a .proto
* file. It supports many features and operations for dealing with proto
* messages:
* - reflection over .proto types at runtime (list fields, get names, etc).
* - an in-memory byte-level format for efficiently storing and accessing msgs.
* - serializing and deserializing from the in-memory format to a protobuf.
* - optional memory management for handling strings, arrays, and submessages.
*
* Throughout this file, the following convention is used:
* - "struct upb_msg *m" describes a message type (name, list of fields, etc).
* - "void *data" is an actual message stored using the in-memory format.
*
* The in-memory format is very much like a C struct that you can define at
* run-time, but also supports reflection. Like C structs it supports
* offset-based access, as opposed to the much slower name-based lookup. The
* format represents both the values themselves and bits describing whether each
* field is set or not.
* format stores both the values themselves and bits describing whether each
* field is set or not. For example:
*
* parsed message Foo {
* optional bool a = 1;
* repeated uint32 b = 2;
* optional Bar c = 3;
* }
*
* The in-memory layout for this message on a 32-bit machine will be something
* like:
*
* Foo
* +------------------------+
* | set_flags a:1, b:1, c:1|
* +------------------------+
* | bool a (1 byte) |
* +------------------------+
* | padding (3 bytes) |
* +------------------------+ upb_array
* | upb_array* b (4 bytes) | ----> +----------------------------+
* +------------------------+ | uint32* elements (4 bytes) | ---+
* | Bar* c (4 bytes) | +----------------------------+ |
* +------------------------+ | uint32 size (4 bytes) | |
* +----------------------------+ |
* |
* -----------------------------------------------------------------+
* |
* V
* uint32 array
* +----+----+----+----+----+----+
* | e1 | e2 | e3 | e4 | e5 | e6 |
* +----+----+----+----+----+----+
*
* And the corresponding C structure (as emitted by the proto compiler) would be:
*
* The upb compiler emits C structs that mimic this definition exactly, so that
* you can access the same hunk of memory using either this run-time
* reflection-supporting interface or a C struct that was generated by the upb
* compiler.
* struct Foo {
* union {
* uint8_t bytes[1];
* struct {
* bool a:1;
* bool b:1;
* bool c:1;
* } has;
* } set_flags;
* bool a;
* upb_uint32_array *b;
* Bar *c;
* }
*
* Like C structs the format depends on the endianness of the host machine, so
* it is not suitable for exchanging across machines of differing endianness.
* But there is no reason to do that -- the protobuf serialization format is
* designed already for serialization/deserialization, and is more compact than
* this format. This format is designed to allow the fastest possible random
* access of individual fields.
* Because the C struct emitted by the upb compiler uses exactly the same
* byte-level format as the reflection interface, you can access the same hunk
* of memory either way. The C struct provides maximum performance and static
* type safety; upb_msg provides flexibility.
*
* Note that clients need not use the memory management facilities defined here.
* They are for convenience only -- clients wishing to do their own memory
* management may do so (allowing clients to perform advanced techniques like
* reference-counting, garbage collection, and string references). Different
* The in-memory format has no interoperability guarantees whatsoever, except
* that a single version of upb will interoperate with itself. Don't even
* think about persisting the in-memory format or sending it anywhere. That's
* what serialized protobufs are for! The in-memory format is just that -- an
* in-memory representation that allows for fast access.
*
* The in-memory format is carefully designed to *not* mandate any particular
* memory management scheme. This should make it easier to integrate with
* existing memory management schemes, or to perform advanced techniques like
* reference counting, garbage collection, and string references. Different
* clients can read each others messages regardless of what memory management
* scheme each is using.
*
* A memory management scheme is provided for convenience, and it is used by
* default by the stock message parser. Clients can substitute their own
* memory management scheme into this parser without any loss of generality
* or performance.
*/
#ifndef UPB_MSG_H_
#define UPB_MSG_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "upb.h"
#include "upb_table.h"
@ -59,7 +113,10 @@ struct google_protobuf_FieldDescriptorProto;
/* Message definition. ********************************************************/
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively). */
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
* critical path of parsing. Minimizing the size of this struct increases
* cache-friendliness. */
struct upb_msg_field {
union upb_symbol_ref ref;
uint32_t byte_offset; /* Where to find the data. */
@ -102,7 +159,7 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
return m->field_descriptors[f->field_index];
}
/* Initialize and free a upb_msg. Caller retains ownership of d, but the msg
/* Initializes/frees a upb_msg. Caller retains ownership of d, but the msg
* will contain references to it, so it must outlive the msg. Note that init
* does not resolve upb_msg_field.ref -- the caller should do that
* post-initialization by calling upb_msg_ref() below. */
@ -114,9 +171,9 @@ void upb_msg_free(struct upb_msg *m);
* mutually-recursive ways, this step must be separated from initialization. */
void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref);
/* While these are written to be as fast as possible, it will still be faster
* to cache the results of this lookup if possible. These return NULL if no
* such field is found. */
/* Looks up a field by name or number. While these are written to be as fast
* as possible, it will still be faster to cache the results of this lookup if
* possible. These return NULL if no such field is found. */
INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
uint32_t number) {
struct upb_fieldsbynum_entry *e = upb_inttable_lookup(
@ -130,33 +187,69 @@ INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
return e ? &e->f : NULL;
}
/* "Set" flag reading and writing. *******************************************/
INLINE size_t upb_isset_offset(uint32_t field_index) {
return field_index / 8;
}
INLINE uint8_t upb_isset_mask(uint32_t field_index) {
return 1 << (field_index % 8);
}
/* Functions for reading and writing the "set" flags in the msg. Note that
* these do not perform memory management associated with any dynamic memory
* these fields may be referencing. These *only* set and test the flags. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{
((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}
INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{
((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}
INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f)
{
return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}
INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
{
int num_fields = m->num_required_fields;
int i = 0;
while(num_fields > 8) {
if(((uint8_t*)s)[i++] != 0xFF) return false;
num_fields -= 8;
}
if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
return true;
}
INLINE void upb_msg_clear(void *s, struct upb_msg *m)
{
memset(s, 0, m->set_flags_bytes);
}
/* Scalar (non-array) data access. ********************************************/
/* Returns a pointer to a specific field in a message. */
INLINE union upb_value_ptr upb_msg_getptr(void *data, struct upb_msg_field *f) {
union upb_value_ptr p = {._void = ((char*)data + f->byte_offset)};
return p;
}
/* Arrays. ********************************************************************/
/* Represents an array (a repeated field) of any type. The interpretation of
* the data in the array depends on the type. */
struct upb_array {
union {
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
struct upb_string **string;
void **submsg;
void *_void;
} elements;
union upb_value_ptr elements;
uint32_t len; /* Measured in elements. */
};
/* These are all overlays on upb_array, pointers between them can be cast. */
#define UPB_DEFINE_ARRAY_TYPE(name, type) \
struct name ## _array { \
type *elements; \
uint32_t len; \
};
/* Returns a pointer to an array element. */
INLINE union upb_value_ptr upb_array_getelementptr(
struct upb_array *arr, uint32_t n, upb_field_type_t type)
{
@ -166,6 +259,13 @@ INLINE union upb_value_ptr upb_array_getelementptr(
return ptr;
}
/* These are all overlays on upb_array, pointers between them can be cast. */
#define UPB_DEFINE_ARRAY_TYPE(name, type) \
struct name ## _array { \
type *elements; \
uint32_t len; \
};
UPB_DEFINE_ARRAY_TYPE(upb_double, double)
UPB_DEFINE_ARRAY_TYPE(upb_float, float)
UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t)
@ -175,6 +275,7 @@ UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
/* Defines an array of a specific message type. */
#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
#define UPB_DEFINE_MSG_ARRAY(msg_type) \
UPB_MSG_ARRAY(msg_type) { \
@ -182,52 +283,42 @@ UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
uint32_t len; \
};
/* Accessors for primitive types. ********************************************/
/* Memory management *********************************************************/
/* For each primitive type we define a set of three functions:
*
* // For fetching out of a msg (s points to the raw msg data).
* int32_t *upb_msg_get_int32_ptr(void *s, struct upb_msg_field *f);
* int32_t upb_msg_get_int32(void *s, struct upb_msg_field *f);
* void upb_msg_set_int32(void *s, struct upb_msg_field *f, int32_t val);
*
* These do no existence checks, bounds checks, or type checks. */
#define UPB_DEFINE_ACCESSORS(INLINE, name, ctype) \
INLINE ctype *upb_msg_get_ ## name ## _ptr( \
void *s, struct upb_msg_field *f) { \
return (ctype*)((char*)s + f->byte_offset); \
} \
INLINE ctype upb_msg_get_ ## name( \
void *s, struct upb_msg_field *f) { \
return *upb_msg_get_ ## name ## _ptr(s, f); \
} \
INLINE void upb_msg_set_ ## name( \
void *s, struct upb_msg_field *f, ctype val) { \
*upb_msg_get_ ## name ## _ptr(s, f) = val; \
}
/* One important note about these memory management routines: they must be used
* completely or not at all (for each message). In other words, you can't
* allocate your own message and then free it with upb_msgdata_free. As
* another example, you can't point a field to your own string and then call
* upb_msg_reuse_str. */
/* Allocates and frees message data, respectively. Newly allocated data is
* initialized to empty. Freeing a message always frees string data, but
* the client can decide whether or not submessages should be deleted. */
void *upb_msgdata_new(struct upb_msg *m);
void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs);
/* Given a pointer to the appropriate field of the message or array, these
* functions will lazily allocate memory for a string, array, or submessage.
* If the previously allocated memory is big enough, it will reuse it without
* re-allocating. See upb_msg.c for example usage. */
/* Reuse a string of at least the given size. */
void upb_msg_reuse_str(struct upb_string **str, uint32_t size);
/* Like the previous, but assumes that the string will be by reference, so
* doesn't allocate memory for the string itself. */
void upb_msg_reuse_strref(struct upb_string **str);
UPB_DEFINE_ACCESSORS(INLINE, double, double)
UPB_DEFINE_ACCESSORS(INLINE, float, float)
UPB_DEFINE_ACCESSORS(INLINE, int32, int32_t)
UPB_DEFINE_ACCESSORS(INLINE, int64, int64_t)
UPB_DEFINE_ACCESSORS(INLINE, uint32, uint32_t)
UPB_DEFINE_ACCESSORS(INLINE, uint64, uint64_t)
UPB_DEFINE_ACCESSORS(INLINE, bool, bool)
UPB_DEFINE_ACCESSORS(INLINE, bytes, struct upb_string*)
UPB_DEFINE_ACCESSORS(INLINE, string, struct upb_string*)
UPB_DEFINE_ACCESSORS(INLINE, submsg, void*)
UPB_DEFINE_ACCESSORS(INLINE, array, struct upb_array*)
INLINE union upb_value_ptr upb_msg_get_ptr(
void *data, struct upb_msg_field *f) {
union upb_value_ptr p = {._void = ((char*)data + f->byte_offset)};
return p;
}
/* Reuse an array of at least the given size, with the given type. */
void upb_msg_reuse_array(struct upb_array **arr, uint32_t size,
upb_field_type_t t);
/* Memory management *********************************************************/
/* Reuse a submessage of the given type. */
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
void *upb_msg_new(struct upb_msg *m);
/* Serialization/Deserialization. ********************************************/
/* This is all just a layer on top of the stream-oriented facility in
* upb_parse.h. */
struct upb_msg_parse_state {
struct upb_parse_state s;
@ -236,70 +327,32 @@ struct upb_msg_parse_state {
struct upb_msg *m;
};
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
/* Initializes/frees a message parser. The parser will write the data to the
* message data "data", which the caller must have previously allocated (the
* parser will allocate submsgs, strings, and arrays as needed, however).
*
* "Merge" controls whether the parser will append to data instead of
* overwriting. Merging concatenates arrays and merges submessages instead
* of clearing both.
*
* "Byref" controls whether the new message data copies or references strings
* it encounters. If byref == true, then all strings supplied to upb_msg_parse
* must remain unchanged and must outlive data. */
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *data,
struct upb_msg *m, bool merge, bool byref);
void upb_msg_parse_free(struct upb_msg_parse_state *s);
/* Parses a protobuf fragment, writing the data to the message that was passed
* to upb_msg_parse_init. This function can be called multiple times as more
* data becomes available. */
upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
void *data, size_t len, size_t *read);
/* Parses the protobuf in s (which is expected to be complete) and allocates
* new message data to hold it. This is an alternative to the streaming API
* above. "byref" works as in upb_msg_parse_init(). */
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);
/* Note! These two may not be use on a upb_string* that was initialized by
* means other than these functions. */
void upb_msg_reuse_str(struct upb_string **str, uint32_t len);
void upb_msg_reuse_array(struct upb_array **arr, uint32_t n, upb_field_type_t t);
void upb_msg_reuse_strref(struct upb_string **str);
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
/* "Set" flag reading and writing. *******************************************/
INLINE size_t upb_isset_offset(uint32_t field_index) {
return field_index / 8;
}
INLINE uint8_t upb_isset_mask(uint32_t field_index) {
return 1 << (field_index % 8);
}
/* Functions for reading and writing the "set" flags in the msg. Note that
* these do not perform memory management associated with any dynamic memory
* these fields may be referencing. These *only* set and test the flags. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{
((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}
INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{
((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}
INLINE bool upb_msg_is_set(void *s, struct upb_msg_field *f)
{
return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}
INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
{
int num_fields = m->num_required_fields;
int i = 0;
while(num_fields > 8) {
if(((uint8_t*)s)[i++] != 0xFF) return false;
num_fields -= 8;
}
if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
return true;
}
INLINE void upb_msg_clear(void *s, struct upb_msg *m)
{
memset(s, 0, m->set_flags_bytes);
}
/* Serialization/Deserialization. ********************************************/
/* Parses the string data in s according to the message description in m. */
upb_status_t upb_msg_merge(void *data, struct upb_msg *m, struct upb_string *s);
/* Text dump *****************************************************************/

@ -18,6 +18,33 @@
extern "C" {
#endif
/* Definitions. ***************************************************************/
/* A list of types as they are encoded on-the-wire. */
enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
};
typedef uint8_t upb_wire_type_t;
/* A value as it is encoded on-the-wire, except delimited, which is handled
* separately. */
union upb_wire_value {
uint64_t varint;
uint64_t _64bit;
uint32_t _32bit;
};
/* A tag occurs before each value on-the-wire. */
struct upb_tag {
upb_field_number_t field_number;
upb_wire_type_t wire_type;
};
/* High-level parsing interface. **********************************************/
struct upb_parse_state;

@ -65,7 +65,7 @@ static void write_header(struct upb_symtab_entry entries[], int num_entries,
struct upb_string enum_val_prefix = upb_strdup(entry->e.key);
enum_val_prefix.byte_len = memrchr(enum_val_prefix.ptr,
UPB_CONTEXT_SEPARATOR,
UPB_SYMBOL_SEPARATOR,
enum_val_prefix.byte_len);
enum_val_prefix.byte_len++;
to_preproc(enum_val_prefix);
@ -135,7 +135,7 @@ static void write_header(struct upb_symtab_entry entries[], int num_entries,
/* Submessages get special treatment, since we have to use the message
* name directly. */
struct upb_string type_name_ref = *fd->type_name;
if(type_name_ref.ptr[0] == UPB_CONTEXT_SEPARATOR) {
if(type_name_ref.ptr[0] == UPB_SYMBOL_SEPARATOR) {
/* Omit leading '.'. */
type_name_ref.ptr++;
type_name_ref.byte_len--;
@ -207,5 +207,6 @@ int main()
struct upb_string name = UPB_STRLIT("descriptor.proto");
write_header(entries, symcount, name, stdout);
upb_context_free(&c);
upb_strfree(fds);
}

Loading…
Cancel
Save