Added calculation of sizes for serialization (untested).

pull/13171/head
Joshua Haberman 16 years ago
parent 4bcdea25f0
commit 85f6cecb80
  1. 117
      src/upb_msg.c
  2. 58
      src/upb_msg.h
  3. 3
      src/upb_parse.h
  4. 18
      src/upb_serialize.h

@ -9,6 +9,7 @@
#include "descriptor.h" #include "descriptor.h"
#include "upb_msg.h" #include "upb_msg.h"
#include "upb_parse.h" #include "upb_parse.h"
#include "upb_serialize.h"
/* Rounds p up to the next multiple of t. */ /* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t)))) #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
@ -248,7 +249,7 @@ void upb_msg_reuse_submsg(void **msg, struct upb_msg *m)
if(!*msg) *msg = upb_msgdata_new(m); if(!*msg) *msg = upb_msgdata_new(m);
} }
/* Serialization/Deserialization. ********************************************/ /* Parsing. ******************************************************************/
static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag, static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag,
void **user_field_desc) void **user_field_desc)
@ -390,6 +391,120 @@ void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref)
} }
} }
/* Serialization. ************************************************************/
/* We store the message sizes linearly in post-order (size of parent after sizes
* of children) for a right-to-left traversal of the message tree. Iterating
* over this in reverse gives us a pre-order (size of parent before sizes of
* children) left-to-right traversal, which is what we want for parsing. */
struct upb_msgsizes {
int len;
int size;
size_t *sizes;
};
/* Declared below -- this and get_valuesize are mutually recursive. */
static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
struct upb_msg *m);
/* Returns a size of a value as it will be serialized. Does *not* include
* the size of the tag -- that is already accounted for. */
static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
struct upb_msg_field *f,
google_protobuf_FieldDescriptorProto *fd)
{
switch(f->type) {
default: assert(false); return 0; /* Internal corruption. */
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: {
size_t submsg_size = get_msgsize(sizes, p.msg, f->ref.msg);
return upb_get_INT32_size(submsg_size) + submsg_size;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP: {
size_t endgrp_tag_size = upb_get_tag_size(fd->number);
return endgrp_tag_size + get_msgsize(sizes, p.msg, f->ref.msg);
}
#define CASE(type, member) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type: \
return upb_get_ ## type ## _size(*p.member);
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
#undef CASE
}
}
/* This is mostly just a pure recursive function to calculate the size of a
* message. However it also stores the results of each level of the recursion
* in sizes, because we need all of this intermediate information later. */
static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
struct upb_msg *m)
{
size_t size = 0;
/* We iterate over fields and arrays in reverse order. */
for(int32_t i = m->num_fields - 1; i >= 0; i--) {
struct upb_msg_field *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
if(!upb_msg_isset(data, f)) continue;
union upb_value_ptr p = upb_msg_getptr(data, f);
if(upb_isarray(f)) {
for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
union upb_value_ptr elem = upb_array_getelementptr((*p.arr), j, f->type);
/* TODO: for packed arrays tag size goes outside the loop. */
size += upb_get_tag_size(fd->number);
size += get_valuesize(sizes, elem, f, fd);
}
} else {
size += upb_get_tag_size(fd->number);
size += get_valuesize(sizes, p, f, fd);
}
}
/* Resize the 'sizes' array if necessary. */
assert(sizes->len <= sizes->size);
if(sizes->len == sizes->size) {
sizes->size *= 2;
sizes->sizes = realloc(sizes->sizes, sizes->size * sizeof(size_t));
}
/* Add our size (already added our children, so post-order). */
sizes->sizes[sizes->len++] = size;
return size;
}
void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data, struct upb_msg *m)
{
get_msgsize(sizes, data, m);
}
/* Initialize/free a upb_msg_sizes for the given message. */
void upb_msgsizes_init(struct upb_msgsizes *sizes)
{
sizes->len = 0;
sizes->size = 0;
sizes->sizes = NULL;
}
void upb_msgsizes_free(struct upb_msgsizes *sizes)
{
free(sizes->sizes);
}
size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes)
{
return sizes->sizes[sizes->len-1];
}
/* Comparison. ***************************************************************/
bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2, bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
upb_field_type_t type) upb_field_type_t type)
{ {

@ -215,32 +215,39 @@ INLINE bool upb_isarray(struct upb_msg_field *f) {
/* "Set" flag reading and writing. *******************************************/ /* "Set" flag reading and writing. *******************************************/
/* Please note that these functions do not perform any memory management or in
* any way ensure that the fields are valid. They *only* test/set/clear a bit
* that indicates whether the field is set or not. */
/* Returns the byte offset where we store whether this field is set. */
INLINE size_t upb_isset_offset(uint32_t field_index) { INLINE size_t upb_isset_offset(uint32_t field_index) {
return field_index / 8; return field_index / 8;
} }
/* Returns the mask within the appropriate byte that selects the set bit. */
INLINE uint8_t upb_isset_mask(uint32_t field_index) { INLINE uint8_t upb_isset_mask(uint32_t field_index) {
return 1 << (field_index % 8); return 1 << (field_index % 8);
} }
/* Functions for reading and writing the "set" flags in the msg. Note that /* Returns true if the given field is set, false otherwise. */
* these do not perform memory management associated with any dynamic memory
* these fields may be referencing. These *only* set and test the flags. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f) INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
{ {
((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index); ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
} }
/* Clears the set bit for this field in the given message. */
INLINE void upb_msg_unset(void *s, struct upb_msg_field *f) INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
{ {
((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index); ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
} }
/* Tests whether the given field is set. */
INLINE bool upb_msg_isset(void *s, struct upb_msg_field *f) INLINE bool upb_msg_isset(void *s, struct upb_msg_field *f)
{ {
return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index); return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
} }
/* Returns true if *all* required fields are set, false otherwise. */
INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m) INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
{ {
int num_fields = m->num_required_fields; int num_fields = m->num_required_fields;
@ -253,6 +260,7 @@ INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
return true; return true;
} }
/* Clears the set bit for all fields. */
INLINE void upb_msg_clear(void *s, struct upb_msg *m) INLINE void upb_msg_clear(void *s, struct upb_msg *m)
{ {
memset(s, 0, m->set_flags_bytes); memset(s, 0, m->set_flags_bytes);
@ -304,7 +312,7 @@ void upb_msg_reuse_array(struct upb_array **arr, uint32_t size,
/* Reuse a submessage of the given type. */ /* Reuse a submessage of the given type. */
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m); void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
/* Serialization/Deserialization. ********************************************/ /* Parsing. ******************************************************************/
/* This is all just a layer on top of the stream-oriented facility in /* This is all just a layer on top of the stream-oriented facility in
* upb_parse.h. */ * upb_parse.h. */
@ -352,6 +360,48 @@ upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
* above. "byref" works as in upb_msg_parse_init(). */ * above. "byref" works as in upb_msg_parse_init(). */
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref); void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);
/* Serialization *************************************************************/
/* For messages that contain any submessages, we must do a pre-pass on the
* message tree to discover the size of all submessages. This is necessary
* because when serializing, the message length has to precede the message data
* itself.
*
* We can calculate these sizes once and reuse them as long as the message is
* known not to have changed. */
struct upb_msgsizes;
/* Initialize/free a upb_msgsizes for the given message. */
void upb_msgsizes_init(struct upb_msgsizes *sizes);
void upb_msgsizes_free(struct upb_msgsizes *sizes);
/* Given a previously initialized sizes, recurse over the message and store its
* sizes in 'sizes'. */
void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data,
struct upb_msg *m);
/* Returns the total size of the serialized message given in sizes. Must be
* preceeded by a call to upb_msgsizes_read. */
size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes);
struct upb_msg_serialize_state;
/* Initializes the state of serialization. The provided message must not
* change between the upb_msgsizes_read() call that was used to construct
* "sizes" and the parse being fully completed. */
void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s);
void upb_msg_serialize_free(struct upb_msg_serialize_state *s);
void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
struct upb_msg *m, struct upb_msgsizes *sizes);
/* Serializes the next set of bytes into buf (which has size len). Returns
* UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
* if there is more data from the message left to be serialized.
*
* The number of bytes written to buf is returned in *read. This will be
* equal to len unless we finished serializing. */
upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
void *buf, size_t len, size_t *read);
/* Text dump *****************************************************************/ /* Text dump *****************************************************************/

@ -164,8 +164,7 @@ INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end,
{ {
uint64_t val64; uint64_t val64;
UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf)); UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf));
/* TODO: should we throw an error if any of the high bits in val64 are set? */ *val = (uint32_t)val64; /* Discard the high bits. */
*val = (uint32_t)val64;
return UPB_STATUS_OK; return UPB_STATUS_OK;
} }

@ -44,11 +44,19 @@ INLINE upb_status_t upb_put_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t val,
return UPB_STATUS_OK; return UPB_STATUS_OK;
} }
/* Puts a varint -- called when we only have 32 bits of data. */ /* Puts an unsigned 32-bit varint, verbatim. Never uses the high 64 bits. */
INLINE upb_status_t upb_put_v_uint32_t(uint8_t *buf, uint8_t *end, INLINE upb_status_t upb_put_v_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t val, uint8_t **outbuf) uint32_t val, uint8_t **outbuf)
{ {
return UPB_STATUS_OK; return upb_put_v_uint64_t(buf, end, val, outbuf);
}
/* Puts a signed 32-bit varint, first sign-extending to 64-bits. We do this to
* maintain wire-compatibility with 64-bit signed integers. */
INLINE upb_status_t upb_put_v_int32_t(uint8_t *buf, uint8_t *end,
int32_t val, uint8_t **outbuf)
{
return upb_put_v_uint64_t(buf, end, (int64_t)val, outbuf);
} }
INLINE void upb_put32(uint8_t *buf, uint32_t val) { INLINE void upb_put32(uint8_t *buf, uint32_t val) {
@ -183,9 +191,9 @@ T(FLOAT, f, uint32_t, float, _float) {
#undef PUT #undef PUT
#undef T #undef T
/* Functions to get sizes of serialized values without serializing. ***********/ size_t upb_get_tag_size(uint32_t fieldnum) {
return upb_v_uint64_t_size((uint64_t)fieldnum << 3);
}
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

Loading…
Cancel
Save