A bit more work on generalizing parsing.

pull/13171/head
Joshua Haberman 16 years ago
parent 6fdf83f59f
commit b8481e0e55
  1. 1
      upb.h
  2. 2
      upb_context.c
  3. 4
      upb_context.h
  4. 104
      upb_msg.c
  5. 26
      upb_msg.h
  6. 6
      upb_parse.c
  7. 6
      upb_parse.h

@ -112,6 +112,7 @@ union upb_value_ptr {
bool *_bool;
struct upb_string **string;
struct upb_array **array;
void **message;
void *_void;
};

@ -232,7 +232,7 @@ error:
}
bool upb_context_parsefd(struct upb_context *c, struct upb_string *fd_str) {
google_protobuf_FileDescriptorProto *fd = upb_msg_parse(c->fd_msg, fd_str);
google_protobuf_FileDescriptorProto *fd = upb_alloc_and_parse(c->fd_msg, fd_str);
if(!fd) return false;
if(!upb_context_addfd(c, fd)) return false;
c->fd[c->fd_len++] = fd; /* Need to keep a ref since we own it. */

@ -70,8 +70,8 @@ INLINE struct upb_symtab_entry *upb_context_symbegin(struct upb_context *c) {
}
INLINE struct upb_symtab_entry *upb_context_symnext(
struct upb_context *c, struct upb_inttable_entry *cur) {
return upb_strtable_next(&c->symtab, cur);
struct upb_context *c, struct upb_symtab_entry *cur) {
return upb_strtable_next(&c->symtab, &cur->e);
}
/* Adding symbols. ************************************************************/

@ -113,7 +113,7 @@ struct mm_upb_array {
char *data;
};
uint32_t round_up_to_pow2(uint32_t v)
static uint32_t round_up_to_pow2(uint32_t v)
{
#ifdef __GNUC__
return (1U<<31) >> (__builtin_clz(v-1)+1);
@ -168,6 +168,14 @@ struct parse_frame_data {
void *data;
};
static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m,
void *data)
{
struct parse_frame_data *frame = (void*)s->top->user_data;
frame->m = m;
frame->data = data;
}
static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag,
void **user_field_desc)
{
@ -179,32 +187,36 @@ static upb_field_type_t tag_cb(struct upb_parse_state *s, struct upb_tag *tag,
return f->type;
}
static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f)
{
union upb_value_ptr p = upb_msg_get_ptr(data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
size_t len = upb_msg_is_set(data, f) ? (*p.array)->len : 0;
upb_msg_reuse_array(p.array, len, f->type);
(*p.array)->len = len + 1;
p = upb_array_getelementptr(*p.array, len, f->type);
}
return p;
}
static upb_status_t value_cb(struct upb_parse_state *s, void **buf, void *end,
upb_field_type_t type, void *user_field_desc)
void *user_field_desc)
{
struct parse_frame_data *frame = (void*)s->top->user_data;
struct upb_msg_field *f = user_field_desc;
union upb_value_ptr p = upb_msg_get_ptr(frame->data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
upb_msg_reuse_array(p.array, (*p.array)->len, type);
p = upb_array_getelementptr(*p.array, (*p.array)->len++, type);
}
UPB_CHECK(upb_parse_value(buf, end, type, p));
union upb_value_ptr p = get_value_ptr(frame->data, f);
UPB_CHECK(upb_parse_value(buf, end, f->type, p));
return UPB_STATUS_OK;
}
static upb_status_t str_cb(struct upb_parse_state *s, struct upb_string *str,
upb_field_type_t type, void *user_field_desc)
static upb_status_t str_cb(struct upb_parse_state *_s, struct upb_string *str,
void *user_field_desc)
{
struct parse_frame_data *frame = (void*)s->top->user_data;
struct upb_msg_parse_state *s = (void*)_s;
struct parse_frame_data *frame = (void*)s->s.top->user_data;
struct upb_msg_field *f = user_field_desc;
union upb_value_ptr p = upb_msg_get_ptr(frame->data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
upb_msg_reuse_array(p.array, (*p.array)->len, type);
p = upb_array_getelementptr(*p.array, (*p.array)->len++, type);
}
bool byref = false;
if(byref) {
union upb_value_ptr p = get_value_ptr(frame->data, f);
if(s->byref) {
upb_msg_reuse_strref(p.string);
**p.string = *str;
} else {
@ -214,33 +226,39 @@ static upb_status_t str_cb(struct upb_parse_state *s, struct upb_string *str,
return UPB_STATUS_OK;
}
static void set_frame_data(struct upb_parse_state *s, struct upb_msg *m,
void *data)
static void submsg_start_cb(struct upb_parse_state *_s, void *user_field_desc)
{
struct parse_frame_data *frame = (void*)s->top->user_data;
frame->m = m;
frame->data = data;
struct upb_msg_parse_state *s = (void*)_s;
struct upb_msg_field *f = user_field_desc;
struct parse_frame_data *frame = (void*)s->s.top->user_data;
union upb_value_ptr p = upb_msg_get_ptr(frame->data, f);
upb_msg_reuse_submsg(*p.message, f->ref.msg);
if(!s->merge) upb_msg_clear(frame->data, f->ref.msg);
set_frame_data(&s->s, f->ref.msg, *p.message);
}
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref)
{
upb_parse_init(&s->s, sizeof(struct parse_frame_data));
s->merge = merge;
s->byref = byref;
if(!merge && msg == NULL) msg = upb_msg_new(m);
set_frame_data(&s->s, m, msg);
s->s.tag_cb = tag_cb;
s->s.value_cb = value_cb;
s->s.str_cb = str_cb;
s->s.submsg_start_cb = submsg_start_cb;
}
static void submsg_start_cb(struct upb_parse_state *s, void *user_field_desc)
void upb_msg_parse_free(struct upb_msg_parse_state *s)
{
struct upb_msg_field *f = user_field_desc;
struct parse_frame_data *frame = (void*)s->top->user_data;
void **submsg = upb_msg_get_submsg_ptr(frame->data, f);
upb_msg_reuse_submsg(submsg, f->ref.msg);
set_frame_data(s, f->ref.msg, *submsg);
}
upb_status_t upb_msg_merge(void *data, struct upb_msg *m, struct upb_string *str)
{
struct upb_parse_state s;
upb_parse_state_init(&s, sizeof(struct parse_frame_data));
set_frame_data(&s, m, data);
s.tag_cb = tag_cb;
s.value_cb = value_cb;
s.str_cb = str_cb;
s.submsg_start_cb = submsg_start_cb;
size_t read;
UPB_CHECK(upb_parse(&s, str->ptr, str->byte_len, &read));
return UPB_STATUS_OK;
upb_parse_free(&s->s);
}
upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
void *data, size_t len, size_t *read)
{
return upb_parse(&s->s, data, len, read);
}

@ -46,6 +46,7 @@
#include "upb.h"
#include "upb_table.h"
#include "upb_parse.h"
#ifdef __cplusplus
extern "C" {
@ -57,13 +58,14 @@ struct google_protobuf_FieldDescriptorProto;
/* Message definition. ********************************************************/
/* Structure that describes a single field in a message. */
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively). */
struct upb_msg_field {
union upb_symbol_ref ref;
uint32_t byte_offset; /* Where to find the data. */
uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
upb_label_t label;
union upb_symbol_ref ref;
};
/* Structure that describes a single .proto message type. */
@ -155,8 +157,8 @@ struct upb_array {
uint32_t len; \
};
union upb_value_ptr upb_array_getelementptr(struct upb_array *arr, uint32_t n,
upb_field_type_t type)
INLINE union upb_value_ptr upb_array_getelementptr(
struct upb_array *arr, uint32_t n, upb_field_type_t type)
{
union upb_value_ptr ptr = {
._void = ((char*)arr->elements._void + n*upb_type_info[type].size)
@ -226,7 +228,21 @@ INLINE union upb_value_ptr upb_msg_get_ptr(
/* Memory management *********************************************************/
void *upb_msg_new(struct upb_msg *m);
//void upb_msg_free(void *msg, struct upb_msg *m, bool free_submsgs);
struct upb_msg_parse_state {
struct upb_parse_state s;
bool merge;
bool byref;
struct upb_msg *m;
};
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref);
void upb_msg_parse_free(struct upb_msg_parse_state *s);
upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
void *data, size_t len, size_t *read);
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s);
/* Note! These two may not be use on a upb_string* that was initialized by
* means other than these functions. */

@ -308,12 +308,12 @@ static upb_status_t parse_delimited(struct upb_parse_state *s,
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) {
struct upb_string str = {.ptr = *buf, .byte_len = delim_len};
s->str_cb(s, &str, ft, user_field_desc);
s->str_cb(s, &str, user_field_desc);
*buf = delim_end;
} else {
/* Packed Array. */
while(*buf < delim_end)
UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc));
UPB_CHECK(s->value_cb(s, buf, end, user_field_desc));
}
}
return UPB_STATUS_OK;
@ -332,7 +332,7 @@ static upb_status_t parse_nondelimited(struct upb_parse_state *s,
/* No length specified, an "end group" tag will mark the end. */
UPB_CHECK(push_stack_frame(s, 0, user_field_desc));
} else {
UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc));
UPB_CHECK(s->value_cb(s, buf, end, user_field_desc));
}
return UPB_STATUS_OK;
}

@ -25,8 +25,8 @@ struct upb_parse_state;
/* Initialize and free (respectively) the given parse state, which must have
* been previously allocated. udata_size specifies how much space will be
* available at parse_stack_frame.user_data in each frame for user data. */
void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size);
void upb_parse_state_free(struct upb_parse_state *state);
void upb_parse_init(struct upb_parse_state *state, size_t udata_size);
void upb_parse_free(struct upb_parse_state *state);
/* The callback that is called immediately after a tag has been parsed. The
* client should determine whether it wants to parse or skip the corresponding
@ -48,13 +48,11 @@ typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s,
* call to tag_cb in the case of packed arrays. */
typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s,
void **buf, void *end,
upb_field_type_t type,
void *user_field_desc);
/* The callback that is called when a string is parsed. */
typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s,
struct upb_string *str,
upb_field_type_t type,
void *user_field_desc);
/* Callbacks that are called when a submessage begins and ends, respectively.

Loading…
Cancel
Save