From 8c1e7170b74e1a6a29736f63507f83ddeb51f560 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 24 May 2010 11:15:08 -0700 Subject: [PATCH] Defined the upb_src and upb_bytesrc interfaces. --- src/upb.h | 13 ++--- src/upb_decoder.c | 27 +++++----- src/{upb_sink.h => upb_srcsink.h} | 85 ++++++++++++++++--------------- 3 files changed, 63 insertions(+), 62 deletions(-) rename src/{upb_sink.h => upb_srcsink.h} (65%) diff --git a/src/upb.h b/src/upb.h index 4fb5773e18..c65a686e8f 100644 --- a/src/upb.h +++ b/src/upb.h @@ -116,12 +116,13 @@ typedef union { uint32_t _32bit; } upb_wire_value; -// A tag occurs before each value on-the-wire. -typedef struct { - upb_field_number_t field_number; - upb_wire_type_t wire_type; -} upb_tag; - +// A key occurs before each value on-the-wire. +typedef uint32_t upb_key; +INLINE upb_key upb_make_key(upb_fieldnum_t fieldnum, upb_wiretype_t wiretype) { + return (fieldnum << 3) | wiretype; +} +INLINE upb_fieldnum_t upb_key_fieldnum(upb_key key) { return key >> 3; } +INLINE upb_wiretype_t upb_key_wiretype(upb_key key) { return key & 0x07; } /* Polymorphic values of .proto types *****************************************/ diff --git a/src/upb_decoder.c b/src/upb_decoder.c index 209db566cf..32b8f160dc 100644 --- a/src/upb_decoder.c +++ b/src/upb_decoder.c @@ -195,18 +195,6 @@ T(FLOAT, f, uint32_t, float, _float) { #undef GET #undef T -// Parses a tag, places the result in *tag. -INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end, - upb_tag *tag, upb_status *status) -{ - uint32_t tag_int; - const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); - tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); - tag->field_number = tag_int >> 3; - return ret; -} - - // Parses a 64-bit varint that is known to be >= 2 bytes (the inline version // handles 1 and 2 byte varints). const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end, @@ -311,13 +299,12 @@ typedef struct { struct upb_decoder { // Immutable state of the decoder. upb_msgdef *toplevel_msgdef; - upb_sink *sink; + upb_bytesrc *bytesrc; // State pertaining to a particular decode (resettable). // Stack entries store the offset where the submsg ends (for groups, 0). upb_decoder_frame stack[UPB_MAX_NESTING], *top, *limit; size_t completed_offset; - void *udata; }; upb_decoder *upb_decoder_new(upb_msgdef *msgdef) @@ -344,6 +331,18 @@ void upb_decoder_reset(upb_decoder *d, upb_sink *sink) d->top->end_offset = 0; } +// Parses a tag, places the result in *tag. +upb_key upb_decoder_src_getkey(upb_decoder *d) +{ + upb_key key; + upb_fill_buffer(d); + d-> + const uint8_t *ret = upb_get_v_uint32_t(buf, end, &tag_int, status); + return ret; +} + + + static const void *get_msgend(upb_decoder *d, const uint8_t *start) { if(d->top->end_offset > 0) diff --git a/src/upb_sink.h b/src/upb_srcsink.h similarity index 65% rename from src/upb_sink.h rename to src/upb_srcsink.h index 5dc5b5203d..7c95059ac6 100644 --- a/src/upb_sink.h +++ b/src/upb_srcsink.h @@ -3,36 +3,20 @@ * * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. * - * upb_sink is a general purpose interface for pushing the contents of a - * protobuf from one component to another in a streaming fashion. We call the - * component that calls a upb_sink a "source". By "pushing" we mean that the - * source calls into the sink; the opposite (where a sink calls into the - * source) is known as "pull". In the push model the source gets the main - * loop; in a pull model the sink does. + * This file defines four general-purpose interfaces for pulling/pushing either + * protobuf data or bytes: * - * This interface is used as general-purpose glue in upb. For example, the - * parser interface works by implementing a source. Likewise the serialization - * simply implements a sink. Copying one protobuf to another is just a matter - * of using one message as a source and another as a sink. + * - upb_src: pull interface for protobuf key/value pairs. + * - upb_sink: push interface for protobuf key/value pairs. + * - upb_bytesrc: pull interface for bytes. + * - upb_bytesink: push interface for bytes. * - * In terms of efficiency, we would generally expect "push" to be faster if the - * source had more state to track, and "pull" to be faster if the sink had more - * state. The reason is that whoever has the main loop can keep state on the - * stack (and possibly even in callee-save registers), whereas the the - * component that is "called into" always needs to reload its state from - * memory. - * - * In terms of programming complexity, it is easier and simpler to have the - * main loop, because you can store state in local variables. - * - * So the assumption inherent in using the push model is that sources are - * generally more complicated and stateful than consumers. For example, in the - * parser case, it has to deal with malformed input and associated errors; in - * comparison, the serializer deals with known-good input. + * These interfaces are used as general-purpose glue in upb. For example, the + * decoder interface works by implementing a upb_src and calling a upb_bytesrc. */ -#ifndef UPB_SINK_H -#define UPB_SINK_H +#ifndef UPB_SRCSINK_H +#define UPB_SRCSINK_H #include "upb_def.h" @@ -40,26 +24,43 @@ extern "C" { #endif -// Each of the upb_sink callbacks returns a status of this type. -typedef enum { - // The normal case, where the consumer wants to continue consuming. - UPB_SINK_CONTINUE, +/* upb_src ********************************************************************/ + +// Retrieves the fielddef for the next field in the stream. Returns NULL on +// error or end-of-stream. +upb_fielddef *upb_src_getdef(upb_src *src); + +// Retrieves and stores the next value in "val". For string types the caller +// does not own a ref to the returned type; you must ref it yourself if you +// want one. Returns false on error. +bool upb_src_getval(upb_src *src, upb_valueptr val); + +// Like upb_src_getval() but skips the value. +bool upb_src_skipval(upb_src *src); + +// Descends into a submessage. +bool upb_src_startmsg(upb_src *src); + +// Stops reading a submessage. May be called before the stream is EOF, in +// which case the rest of the submessage is skipped. +bool upb_src_endmsg(upb_src *src); + +// Returns the current error status for the stream. +upb_status *upb_src_status(upb_src *src); - // The sink did not consume this value, and wants to halt further processing. - // If the source is resumable, it should save the current state so that when - // resumed, the value that was just provided will be replayed. - UPB_SINK_STOP, +/* upb_bytesrc ****************************************************************/ - // The consumer wants to skip to the end of the current submessage and - // continue consuming. If we are at the top-level, the rest of the - // data is discarded. - UPB_SINK_SKIP -} upb_sink_status; +// Returns the next string in the stream. The caller does not own a ref on the +// returned string; you must ref it yourself if you want one. +upb_string *upb_bytesrc_get(upb_bytesrc *src); +// Appends the next "len" bytes in the stream in-place to "str". This should +// be used when the caller needs to build a contiguous string of the existing +// data in "str" with more data. +bool upb_bytesrc_append(upb_bytesrc *src, upb_string *str, upb_strlen_t len); -typedef struct { - struct upb_sink_callbacks *vtbl; -} upb_sink; +// Returns the current error status for the stream. +upb_status *upb_bytesrc_status(upb_src *src); /* upb_sink callbacks *********************************************************/