Major refactoring: upb_string is gone in favor of upb_strref.

pull/13171/head
Joshua Haberman 14 years ago
parent 559e23c796
commit 6a1f3a6693
  1. 3
      Makefile
  2. 35
      benchmarks/parsestream.upb_table.c
  3. 44
      benchmarks/parsetostruct.upb_table.c
  4. 80
      src/upb.c
  5. 110
      src/upb.h
  6. 213
      src/upb_bytestream.h
  7. 276
      src/upb_decoder.c
  8. 75
      src/upb_decoder.h
  9. 28
      src/upb_decoder_x86.dasc
  10. 205
      src/upb_def.c
  11. 49
      src/upb_def.h
  12. 188
      src/upb_descriptor.c
  13. 6
      src/upb_descriptor.h
  14. 49
      src/upb_glue.c
  15. 20
      src/upb_glue.h
  16. 25
      src/upb_handlers.c
  17. 9
      src/upb_handlers.h
  18. 53
      src/upb_msg.c
  19. 6
      src/upb_msg.h
  20. 168
      src/upb_stdio.c
  21. 54
      src/upb_stdio.h
  22. 164
      src/upb_string.c
  23. 394
      src/upb_string.h
  24. 105
      src/upb_strstream.c
  25. 24
      src/upb_strstream.h
  26. 127
      src/upb_table.c
  27. 69
      src/upb_table.h
  28. 44
      src/upb_textprinter.c
  29. 17
      src/upb_varint.h
  30. 25
      tests/test_decoder.c
  31. 126
      tests/test_string.c
  32. 23
      tests/test_table.cc
  33. 58
      tests/test_vs_proto2.cc
  34. 16
      tests/tests.c

@ -75,7 +75,6 @@ CORE= \
src/upb_handlers.c \
src/upb_descriptor.c \
src/upb_table.c \
src/upb_string.c \
src/upb_def.c \
src/upb_msg.c \
src/upb_varint.c \
@ -100,7 +99,6 @@ BENCHMARKS_SRC= \
TESTS_SRC= \
tests/test_decoder.c \
tests/test_def.c \
tests/test_string.c \
tests/tests.c \
tests/tests_varint.c \
@ -212,7 +210,6 @@ tests/test.proto.pb: tests/test.proto
protoc tests/test.proto -otests/test.proto.pb
SIMPLE_TESTS= \
tests/test_string \
tests/test_def \
tests/test_varint \
tests/tests \

@ -1,12 +1,14 @@
#include "main.c"
#include <stdlib.h>
#include "upb_def.h"
#include "upb_decoder.h"
#include "upb_strstream.h"
#include "upb_glue.h"
static upb_string *input_str;
static char *input_str;
static size_t input_len;
static upb_msgdef *def;
static upb_decoder decoder;
static upb_stringsrc stringsrc;
@ -29,32 +31,21 @@ static bool initialize()
// Initialize upb state, decode descriptor.
upb_status status = UPB_STATUS_INIT;
upb_symtab *s = upb_symtab_new();
upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(fds_str == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"),
upb_printerr(&status);
return false;
}
upb_read_descriptor(s, fds_str, &status);
upb_string_unref(fds_str);
upb_read_descriptorfile(s, MESSAGE_DESCRIPTOR_FILE, &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":");
upb_printerr(&status);
upb_status_print(&status, stderr);
return false;
}
def = upb_dyncast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME)));
def = upb_dyncast_msgdef(upb_symtab_lookup(s, MESSAGE_NAME));
if(!def) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(UPB_STRLIT(MESSAGE_NAME)));
fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
return false;
}
upb_symtab_unref(s);
// Read the message data itself.
input_str = upb_strreadfile(MESSAGE_FILE);
input_str = upb_readfile(MESSAGE_FILE, &input_len);
if(input_str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
@ -72,7 +63,7 @@ static bool initialize()
static void cleanup()
{
upb_string_unref(input_str);
free(input_str);
upb_def_unref(UPB_UPCAST(def));
upb_decoder_uninit(&decoder);
upb_stringsrc_uninit(&stringsrc);
@ -82,14 +73,14 @@ static size_t run(int i)
{
(void)i;
upb_status status = UPB_STATUS_INIT;
upb_stringsrc_reset(&stringsrc, input_str);
upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), NULL);
upb_stringsrc_reset(&stringsrc, input_str, input_len);
upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), 0, UINT64_MAX, NULL);
upb_decoder_decode(&decoder, &status);
if(!upb_ok(&status)) goto err;
return upb_string_len(input_str);
return input_len;
err:
fprintf(stderr, "Decode error: ");
upb_printerr(&status);
upb_status_print(&status, stderr);
return 0;
}

@ -7,8 +7,8 @@
#include "upb_glue.h"
#include "upb_msg.h"
static upb_string *input_str;
static upb_msgdef *def;
static size_t len;
static void *msg;
static upb_stringsrc strsrc;
static upb_decoder d;
@ -18,33 +18,22 @@ static bool initialize()
// Initialize upb state, decode descriptor.
upb_status status = UPB_STATUS_INIT;
upb_symtab *s = upb_symtab_new();
upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(fds_str == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"),
upb_printerr(&status);
return false;
}
upb_read_descriptor(s, fds_str, &status);
upb_string_unref(fds_str);
upb_read_descriptorfile(s, MESSAGE_DESCRIPTOR_FILE, &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":");
upb_printerr(&status);
upb_status_print(&status, stderr);
return false;
}
def = upb_dyncast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME)));
def = upb_dyncast_msgdef(upb_symtab_lookup(s, MESSAGE_NAME));
if(!def) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(UPB_STRLIT(MESSAGE_NAME)));
fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
return false;
}
upb_symtab_unref(s);
// Read the message data itself.
input_str = upb_strreadfile(MESSAGE_FILE);
if(input_str == NULL) {
char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
}
@ -52,25 +41,17 @@ static bool initialize()
msg = upb_stdmsg_new(def);
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder_initformsgdef(&d, def);
if (!BYREF) {
// Pretend the input string is stack-allocated, which will force its data
// to be copied instead of referenced. There is no good reason to do this,
// except to benchmark against proto2 more fairly, which in its open-source
// release does not support referencing the input string.
input_str->refcount.v = _UPB_STRING_REFCOUNT_STACK;
// TODO: use byref/byval accessors.
}
return true;
}
static void cleanup()
{
if (!BYREF) {
// Undo our fabrication from before.
input_str->refcount.v = 1;
}
upb_string_unref(input_str);
upb_stdmsg_free(msg, def);
upb_def_unref(UPB_UPCAST(def));
upb_stringsrc_uninit(&strsrc);
@ -82,14 +63,13 @@ static size_t run(int i)
(void)i;
upb_status status = UPB_STATUS_INIT;
upb_msg_clear(msg, def);
upb_stringsrc_reset(&strsrc, input_str);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err;
return upb_string_len(input_str);
return len;
err:
fprintf(stderr, "Decode error: ");
upb_printerr(&status);
upb_status_print(&status, stderr);
return 0;
}

@ -5,19 +5,21 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "descriptor_const.h"
#include "upb.h"
#include "upb_string.h"
#include "upb_bytestream.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(wire_type, ctype, inmemory_type) \
{alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
const upb_type_info upb_types[] = {
{0, 0, 0, 0, ""}, // There is no type 0.
TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
@ -42,39 +44,79 @@ const upb_type_info upb_types[] = {
#ifdef NDEBUG
upb_value UPB_NO_VALUE = {{0}};
#else
upb_value UPB_NO_VALUE = {{0}, UPB_VALUETYPE_RAW};
upb_value UPB_NO_VALUE = {{0}, -1};
#endif
void upb_seterr(upb_status *status, enum upb_status_code code,
const char *msg, ...) {
status->code = code;
upb_string_recycle(&status->str);
void upb_status_init(upb_status *status) {
status->buf = NULL;
upb_status_clear(status);
}
void upb_status_uninit(upb_status *status) {
free(status->buf);
}
void upb_status_setf(upb_status *s, enum upb_status_code code,
const char *msg, ...) {
s->code = code;
va_list args;
va_start(args, msg);
upb_string_vprintf(status->str, msg, args);
upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args);
va_end(args);
s->str = s->buf;
}
void upb_copyerr(upb_status *to, upb_status *from)
{
void upb_status_copy(upb_status *to, upb_status *from) {
to->code = from->code;
if(from->str) to->str = upb_string_getref(from->str);
if (from->str) {
if (to->bufsize < from->bufsize) {
to->bufsize = from->bufsize;
to->buf = realloc(to->buf, to->bufsize);
to->str = to->buf;
}
memcpy(to->str, from->str, from->bufsize);
} else {
to->str = NULL;
}
}
void upb_clearerr(upb_status *status) {
void upb_status_clear(upb_status *status) {
status->code = UPB_OK;
if (status->str) upb_string_recycle(&status->str);
status->str = NULL;
}
void upb_printerr(upb_status *status) {
void upb_status_print(upb_status *status, FILE *f) {
if(status->str) {
fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n",
status->code, UPB_STRARG(status->str));
fprintf(f, "code: %d, msg: %s\n", status->code, status->str);
} else {
fprintf(stderr, "code: %d, no msg\n", status->code);
fprintf(f, "code: %d, no msg\n", status->code);
}
}
void upb_status_uninit(upb_status *status) {
upb_string_unref(status->str);
void upb_status_fromerrno(upb_status *status) {
upb_status_setf(status, UPB_ERROR, "%s", strerror(errno));
}
int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
uint32_t len = *size - ofs;
va_list args_copy;
va_copy(args_copy, args);
uint32_t true_len = vsnprintf(*buf + ofs, len, fmt, args_copy);
va_end(args_copy);
// Resize to be the correct size.
if (true_len >= len) {
// Need to print again, because some characters were truncated. vsnprintf
// will not write the entire string unless you give it space to store the
// NULL terminator also.
while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
char *newbuf = realloc(*buf, *size);
if (!newbuf) return -1;
vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
*buf = newbuf;
}
return true_len;
}

@ -30,9 +30,7 @@ extern "C" {
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
INLINE void nop_printf(const char *fmt, ...) {
(void)fmt;
}
INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
#ifdef NDEBUG
#define DEBUGPRINTF nop_printf
@ -45,7 +43,6 @@ INLINE size_t upb_align_up(size_t val, size_t align) {
return val % align == 0 ? val : val + align - (val % align);
}
// The maximum that any submessages can be nested. Matches proto2's limit.
// At the moment this specifies the size of several statically-sized arrays
// and therefore setting it high will cause more memory to be used. Will
@ -122,31 +119,16 @@ typedef struct {
extern const upb_type_info upb_types[];
/* Polymorphic values of .proto types *****************************************/
/* upb_value ******************************************************************/
struct _upb_string;
typedef struct _upb_string upb_string;
struct _upb_array;
typedef struct _upb_array upb_array;
struct _upb_msg;
typedef struct _upb_msg upb_msg;
struct _upb_bytesrc;
typedef struct _upb_bytesrc upb_bytesrc;
struct _upb_strref;
struct _upb_fielddef;
typedef struct _upb_fielddef upb_fielddef;
typedef int32_t upb_strlen_t;
#define UPB_STRLEN_MAX INT32_MAX
// The type of a upb_value. This is like a upb_fieldtype_t, but adds the
// constant UPB_VALUETYPE_ARRAY to represent an array.
typedef uint8_t upb_valuetype_t;
#define UPB_TYPE_ENDGROUP 19 // Need to increase if more real types are added!
#define UPB_VALUETYPE_ARRAY 32
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
#define UPB_VALUETYPE_PTR 35
// Special constants for the upb_value.type field. These must not conflict
// with any members of FieldDescriptorProto.Type.
#define UPB_TYPE_ENDGROUP 0
#define UPB_VALUETYPE_FIELDDEF 32
#define UPB_VALUETYPE_PTR 33
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
@ -159,19 +141,15 @@ typedef struct {
int64_t int64;
uint32_t uint32;
bool _bool;
upb_string *str;
upb_bytesrc *bytesrc;
upb_msg *msg;
upb_array *arr;
upb_atomic_t *refcount;
upb_fielddef *fielddef;
struct _upb_strref *strref;
struct _upb_fielddef *fielddef;
void *_void;
} val;
#ifndef NDEBUG
// In debug mode we carry the value type around also so we can check accesses
// to be sure the right member is being read.
#ifndef NDEBUG
upb_valuetype_t type;
char type;
#endif
} upb_value;
@ -183,7 +161,7 @@ typedef struct {
#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
INLINE ctype upb_value_get ## name(upb_value val) { \
assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \
assert(val.type == proto_type); \
return val.val.membername; \
} \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
@ -197,18 +175,14 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction.
UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
UPB_VALUE_ACCESSORS(strref, strref, struct _upb_strref*, UPB_TYPE(STRING));
UPB_VALUE_ACCESSORS(fielddef, fielddef, struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
extern upb_value UPB_NO_VALUE;
INLINE upb_atomic_t *upb_value_getrefcount(upb_value val) {
assert(val.type == UPB_TYPE(MESSAGE) ||
val.type == UPB_TYPE(STRING) ||
val.type == UPB_VALUETYPE_ARRAY);
return val.val.refcount;
}
/* upb_status *****************************************************************/
// Status codes used as a return value. Codes >0 are not fatal and can be
// resumed.
@ -224,42 +198,38 @@ enum upb_status_code {
// An unrecoverable error occurred.
UPB_ERROR = -1,
// A recoverable error occurred (for example, data of the wrong type was
// encountered which we can skip over).
// UPB_STATUS_RECOVERABLE_ERROR = -2
};
// TODO: consider adding error space and code, to let ie. errno be stored
// as a proper code, or application-specific error codes.
struct _upb_status {
typedef struct {
char code;
upb_string *str;
};
typedef struct _upb_status upb_status;
#define UPB_STATUS_INIT {UPB_OK, NULL}
#define UPB_ERRORMSG_MAXLEN 256
char *str; // NULL when no message is present. NULL-terminated.
char *buf; // Owned by the status.
size_t bufsize;
} upb_status;
INLINE bool upb_ok(upb_status *status) {
return status->code == UPB_OK;
}
INLINE void upb_status_init(upb_status *status) {
status->code = UPB_OK;
status->str = NULL;
}
#define UPB_STATUS_INIT {UPB_OK, NULL, NULL, 0}
void upb_status_init(upb_status *status);
void upb_status_uninit(upb_status *status);
// Caller owns a ref on the returned string.
upb_string *upb_status_tostring(upb_status *status);
void upb_printerr(upb_status *status);
void upb_clearerr(upb_status *status);
void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg,
...);
void upb_copyerr(upb_status *to, upb_status *from);
INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; }
INLINE bool upb_iseof(upb_status *status) { return status->code == UPB_EOF; }
void upb_status_fromerrno(upb_status *status);
void upb_status_print(upb_status *status, FILE *f);
void upb_status_clear(upb_status *status);
void upb_status_setf(upb_status *status, enum upb_status_code code,
const char *fmt, ...);
void upb_status_copy(upb_status *to, upb_status *from);
// Like vaprintf, but uses *buf (which can be NULL) as a starting point and
// reallocates it only if the new value will not fit. "size" is updated to
// reflect the allocated size of the buffer. Returns false on memory alloc
// failure.
int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args);
#ifdef __cplusplus
} /* extern "C" */

@ -1,120 +1,195 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* Defines the interfaces upb_bytesrc and upb_bytesink, which are abstractions
* of read()/write() with useful buffering/sharing semantics.
* This file contains upb_bytesrc and upb_bytesink, which are abstractions of
* stdio (fread()/fwrite()/etc) that provide useful buffering/sharing
* semantics. They are virtual base classes so concrete implementations
* can get the data from a fd, a string, a cord, etc.
*
* Byte streams are NOT thread-safe! (Like f{read,write}_unlocked())
*/
#ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_bytesrc ****************************************************************/
// upb_bytesrc is a pull interface for streams of bytes, basically an
// abstraction of read()/fread(), but it avoids copies where possible.
/* upb_bytesrc ****************************************************************/
typedef upb_strlen_t (*upb_bytesrc_read_fptr)(
upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status);
typedef bool (*upb_bytesrc_getstr_fptr)(
upb_bytesrc *src, upb_string *str, upb_status *status);
// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
// they become available, and to preserve some trailing amount of data.
typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t);
typedef void upb_bytesrc_ref_func(void*);
typedef struct _upb_bytesrc_vtbl {
upb_bytesrc_fetch_func *fetch;
upb_bytesrc_read_func *read;
upb_bytesrc_getptr_func *getptr;
upb_bytesrc_refregion_func *refregion;
upb_bytesrc_refregion_func *unrefregion;
upb_bytesrc_ref_func *ref;
upb_bytesrc_ref_func *unref;
} upb_bytesrc_vtbl;
typedef struct {
upb_bytesrc_read_fptr read;
upb_bytesrc_getstr_fptr getstr;
} upb_bytesrc_vtbl;
upb_bytesrc_vtbl *vtbl;
} upb_bytesrc;
struct _upb_bytesrc {
upb_bytesrc_vtbl *vtbl;
};
INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
src->vtbl = vtbl;
}
INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) {
s->vtbl = vtbl;
// Fetches at least minlen bytes starting at ofs, returning the actual number
// of bytes fetched (or 0 on error: see "s" for details). Gives caller a ref
// on the fetched region. It is safe to re-fetch existing regions but only if
// they are ref'd. "ofs" may not greater than the end of the region that was
// previously fetched.
INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
return src->vtbl->fetch(src, ofs, s);
}
// Reads up to "count" bytes into "buf", returning the total number of bytes
// read. If 0, indicates error and puts details in "status".
INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf,
upb_strlen_t count, upb_status *status) {
return src->vtbl->read(src, buf, count, status);
// Copies "len" bytes of data from offset src_ofs to "dst", which must be at
// least "len" bytes long. The caller must own a ref on the given region.
INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
char *dst) {
src->vtbl->read(src, src_ofs, len, dst);
}
// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure
// that "str" is created or just recycled. Returns "false" if no data was
// returned, either due to error or EOF (check status for details).
// Returns a pointer to the bytesrc's internal buffer, returning how much data
// was actually returned (which may be less than "len" if the given region is
// not contiguous). The caller must own refs on the entire region from [ofs,
// ofs+len]. The returned buffer is valid for as long as the region remains
// ref'd.
//
// In comparison to upb_bytesrc_read(), this call can possibly alias existing
// string data (which avoids a copy). On the other hand, if the data was *not*
// already in an existing string, this copies it into a upb_string, and if the
// data needs to be put in a specific range of memory (because eg. you need to
// put it into a different kind of string object) then upb_bytesrc_get() could
// save you a copy.
INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str,
upb_status *status) {
return src->vtbl->getstr(src, str, status);
// TODO: is "len" really required here?
INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
size_t *len) {
return src->vtbl->getptr(src, ofs, len);
}
// Gives the caller a ref on the given region. The caller must know that the
// given region is already ref'd.
INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
src->vtbl->refregion(src, ofs, len);
}
// Releases a ref on the given region, which the caller must have previously
// ref'd.
INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
src->vtbl->unrefregion(src, ofs, len);
}
// Attempts to ref the bytesrc itself, returning false if this bytesrc is
// not ref-able.
INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) {
if (src->vtbl->ref) {
src->vtbl->ref(src);
return true;
} else {
return false;
}
}
// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref()
// has previously returned true.
INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
assert(src->vtbl->unref);
src->vtbl->unref(src);
}
/* upb_strref *****************************************************************/
// The structure we pass for a string.
typedef struct _upb_strref {
// Pointer to the string data. NULL if the string spans multiple input
// buffers (in which case upb_bytesrc_getptr() must be called to obtain
// the actual pointers).
const char *ptr;
// Bytesrc from which this string data comes. This is only guaranteed to be
// alive from inside the callback; however if the handler knows more about
// its type and how to prolong its life, it may do so.
upb_bytesrc *bytesrc;
// Offset in the bytesrc that represents the beginning of this string.
uint32_t stream_offset;
// Length of the string.
uint32_t len;
// Possibly add optional members here like start_line, start_column, etc.
} upb_strref;
// Copies the contents of the strref into a newly-allocated, NULL-terminated
// string.
INLINE char *upb_strref_dup(struct _upb_strref *r) {
char *ret = (char*)malloc(r->len + 1);
upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret);
ret[r->len] = '\0';
return ret;
}
/* upb_bytesink ***************************************************************/
struct _upb_bytesink;
typedef struct _upb_bytesink upb_bytesink;
typedef upb_strlen_t (*upb_bytesink_putstr_fptr)(
upb_bytesink *bytesink, upb_string *str, upb_status *status);
typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)(
upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args);
typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*);
typedef int32_t upb_bytesink_vprintf_func(
void*, upb_status*, const char *fmt, va_list args);
typedef struct {
upb_bytesink_putstr_fptr putstr;
upb_bytesink_vprintf_fptr vprintf;
upb_bytesink_write_func *write;
upb_bytesink_vprintf_func *vprintf;
} upb_bytesink_vtbl;
struct _upb_bytesink {
typedef struct {
upb_bytesink_vtbl *vtbl;
};
} upb_bytesink;
INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) {
s->vtbl = vtbl;
INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) {
sink->vtbl = vtbl;
}
INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len,
upb_status *s) {
return sink->vtbl->write(sink, buf, len, s);
}
// TODO: Figure out how buffering should be handled. Should the caller buffer
// data and only call these functions when a buffer is full? Seems most
// efficient, but then buffering has to be configured in the caller, which
// could be anything, which makes it hard to have a standard interface for
// controlling buffering.
//
// The downside of having the bytesink buffer is efficiency: the caller is
// making more (virtual) function calls, and the caller can't arrange to have
// a big contiguous buffer. The bytesink can do this, but will have to copy
// to make the data contiguous.
// Returns the number of bytes written.
INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
const char *fmt, ...) {
INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str,
upb_status *s) {
return upb_bytesink_write(sink, str, strlen(str), s);
}
// Returns the number of bytes written or -1 on error.
INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
const char *fmt, ...) {
va_list args;
va_start(args, fmt);
upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
va_end(args);
return ret;
}
// Puts the given string, returning true if the operation was successful, otherwise
// check "status" for details. Ownership of the string is *not* passed; if
// the callee wants a reference he must call upb_string_getref() on it.
INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str,
upb_status *status) {
return sink->vtbl->putstr(sink, str, status);
}
// OPT: add getappendbuf()
// OPT: add writefrombytesrc()
// TODO: add flush()
/* upb_cbuf *******************************************************************/
// A circular buffer implementation for bytesrcs that do internal buffering.
#ifdef __cplusplus
} /* extern "C" */

@ -8,6 +8,7 @@
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
#include "bswap.h"
#include "upb_bytestream.h"
#include "upb_decoder.h"
#include "upb_varint.h"
@ -38,83 +39,97 @@ static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d;
upb_decoder_exit(d);
}
static void upb_decoder_abort(upb_decoder *d, const char *msg) {
upb_status_setf(d->status, UPB_ERROR, msg);
upb_decoder_exit(d);
}
/* Decoding/Buffering of wire types *******************************************/
#define UPB_MAX_VARINT_ENCODED_SIZE 10
static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; }
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
static void upb_decoder_advance(upb_decoder *d, size_t len) {
assert((size_t)(d->end - d->ptr) >= len);
d->ptr += len;
}
size_t upb_decoder_offset(upb_decoder *d) {
size_t offset = d->buf_stream_offset;
if (d->buf) offset += (d->ptr - d->buf);
size_t offset = d->bufstart_ofs;
if (d->ptr) offset += (d->ptr - d->buf);
return offset;
}
static void upb_decoder_setmsgend(upb_decoder *d) {
uint32_t end = d->dispatcher.top->end_offset;
d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end;
upb_dispatcher_frame *f = d->dispatcher.top;
size_t delimlen = f->end_ofs - d->bufstart_ofs;
size_t buflen = d->end - d->buf;
if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
d->delim_end = (uintptr_t)(d->buf + delimlen);
} else {
// Buffers must not run up against the end of memory.
assert((uintptr_t)d->end < UINTPTR_MAX);
d->delim_end = UINTPTR_MAX;
}
}
// Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty.
static void upb_pullbuf(upb_decoder *d, bool need) {
static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
upb_string_recycle(&d->bufstr);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
d->buf = NULL;
d->end = NULL;
if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF.");
upb_decoder_exit(d);
}
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
if (f->end_offset != UPB_NONDELIMITED)
f->end_offset -= last_buf_len;
if (d->bufend_ofs == d->refend_ofs) {
d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
if (!upb_ok(d->status)) {
d->ptr = NULL;
d->end = NULL;
if (upb_iseof(d->status)) return false;
upb_decoder_exit(d);
}
}
d->buf = upb_string_getrobuf(d->bufstr);
d->ptr = upb_string_getrobuf(d->bufstr);
d->end = d->buf + upb_string_len(d->bufstr);
d->bufstart_ofs = d->bufend_ofs;
size_t len;
d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
assert(len > 0);
d->bufend_ofs = d->bufstart_ofs + len;
d->ptr = d->buf;
d->end = d->buf + len;
#ifdef UPB_USE_JIT_X64
d->jit_end = d->end - 20;
upb_string_recycle(&d->tmp);
upb_string_substr(d->tmp, d->bufstr, 0, 0);
#endif
upb_decoder_setmsgend(d);
return true;
}
// Called only from the slow path, this function copies the next "len" bytes
// from the stream to "data", adjusting the decoder state appropriately.
NOINLINE void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) {
while (1) {
size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d));
memcpy(data, d->ptr, to_copy);
upb_decoder_advance(d, to_copy);
bytes -= to_copy;
if (bytes == 0) return;
upb_pullbuf(d, need);
static void upb_pullbuf(upb_decoder *d) {
if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
}
void upb_decoder_commit(upb_decoder *d) {
d->completed_ptr = d->ptr;
if (d->refstart_ofs < d->bufstart_ofs) {
// Drop our ref on the previous buf's region.
upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
d->refstart_ofs = d->bufstart_ofs;
}
}
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) {
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
upb_getbuf(d, &byte, 1, need);
u64 |= ((uint64_t)byte & 0x7F) << bitpos;
}
if(bitpos == 70 && (byte & 0x80)) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
upb_decoder_exit(d);
if (upb_decoder_bufleft(d) == 0) {
upb_pullbuf(d);
ptr = d->ptr;
}
u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
}
if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
return u64;
}
// For tags and delimited lengths, which must be <=32bit and are usually small.
FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
const char *p = d->ptr;
uint32_t ret;
uint64_t u64;
@ -125,11 +140,8 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
u64 = upb_decode_varint_slow(d, need);
if (u64 > 0xffffffff) {
upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n");
upb_decoder_exit(d);
}
u64 = upb_decode_varint_slow(d);
if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
@ -137,57 +149,90 @@ done:
return ret;
}
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
if (upb_decoder_bufleft(d) == 0) {
// Check for our two normal end-of-message conditions.
if (d->bufend_ofs == d->end_ofs) return false;
if (!upb_trypullbuf(d)) return false;
}
*val = upb_decode_varint32(d);
return true;
}
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
if (upb_decoder_bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
upb_decoder_exit(d);
}
if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
} else {
return upb_decode_varint_slow(d, true);
} else if (upb_decoder_bufleft(d) > 0) {
// Intermediate case -- worth it?
char tmpbuf[10];
memset(tmpbuf, 0x80, 10);
memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
upb_decoderet r = upb_vdecode_fast(tmpbuf);
if (r.p != NULL) {
upb_decoder_advance(d, r.p - tmpbuf);
return r.val;
}
}
// Slow case -- varint spans buffer seam.
return upb_decode_varint_slow(d);
}
FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) {
FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) {
// Common (fast) case.
memcpy(val, d->ptr, bytes);
// Fast case.
memcpy(buf, d->ptr, bytes);
upb_decoder_advance(d, bytes);
} else {
upb_getbuf(d, val, bytes, true);
// Slow case.
size_t read = 0;
while (read < bytes) {
size_t avail = upb_decoder_bufleft(d);
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
}
}
}
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
upb_decode_fixed(d, &u32, sizeof(uint32_t));
return u32;
upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
return le32toh(u32);
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
upb_decode_fixed(d, &u64, sizeof(uint64_t));
return u64;
upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
return le64toh(u64);
}
INLINE upb_string *upb_decode_string(upb_decoder *d) {
upb_string_recycle(&d->tmp);
uint32_t strlen = upb_decode_varint32(d, true);
INLINE upb_strref *upb_decode_string(upb_decoder *d) {
uint32_t strlen = upb_decode_varint32(d);
d->strref.stream_offset = upb_decoder_offset(d);
d->strref.len = strlen;
if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
if (upb_decoder_bufleft(d) >= strlen) {
// Common (fast) case.
upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen);
// Fast case.
d->strref.ptr = d->ptr;
upb_decoder_advance(d, strlen);
} else {
upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true);
// Slow case.
while (1) {
size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
upb_decoder_advance(d, consume);
strlen -= consume;
if (strlen == 0) break;
upb_pullbuf(d);
}
}
return d->tmp;
return &d->strref;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end;
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@ -224,7 +269,7 @@ T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, str, upb_string*)
T(STRING, string, strref, upb_strref*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED);
@ -235,28 +280,24 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
}
/* The main decoding loop *****************************************************/
// Called when a user callback returns something other than UPB_CONTINUE.
// This should unwind one or more stack frames, skipping the corresponding
// data in the input.
static void upb_decoder_checkdelim(upb_decoder *d) {
while ((uintptr_t)d->ptr >= d->delim_end) {
if ((uintptr_t)d->ptr > d->delim_end)
upb_decoder_abort(d, "Bad submessage end");
static void upb_delimend(upb_decoder *d) {
if (d->ptr > d->submsg_end) {
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
upb_decoder_exit(d);
}
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
upb_dispatch_endsubmsg(&d->dispatcher);
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
upb_dispatch_endsubmsg(&d->dispatcher);
}
upb_decoder_setmsgend(d);
}
upb_decoder_setmsgend(d);
}
static void upb_decoder_enterjit(upb_decoder *d) {
@ -273,7 +314,8 @@ static void upb_decoder_enterjit(upb_decoder *d) {
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag = upb_decode_varint32(d, false);
uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf
@ -287,8 +329,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
// TODO: support packed.
assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
(tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
uint32_t end = d->dispatcher.top->end_offset;
upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end;
uint32_t end = d->dispatcher.top->end_ofs;
upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
if (f) return f;
@ -299,11 +341,13 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
upb_decoder_advance(d, upb_decode_varint32(d, true));
break;
upb_decoder_advance(d, upb_decode_varint32(d)); break;
default:
upb_decoder_abort(d, "Invavlid wire type");
}
// TODO: deliver to unknown field callback.
while (d->ptr >= d->submsg_end) upb_delimend(d);
upb_decoder_commit(d);
upb_decoder_checkdelim(d);
}
}
@ -311,11 +355,11 @@ void upb_decoder_onexit(upb_decoder *d) {
if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_clearerr(d->status);
upb_status_clear(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status);
} else {
if (d->status->code == UPB_EOF)
upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage.");
upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
}
@ -325,26 +369,32 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
return;
}
d->status = status;
upb_pullbuf(d, true);
upb_dispatch_startmsg(&d->dispatcher);
while(1) { // Main loop: executed once per tag/field pair.
while (d->ptr >= d->submsg_end) upb_delimend(d);
upb_decoder_checkdelim(d);
upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed)
upb_fhandlers *f = upb_decode_tag(d);
if (!f) upb_decoder_exit2(d);
f->decode(d, f);
upb_decoder_commit(d);
}
}
static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
upb_dispatcher_frame *bottom) {
(void)top;
(void)bottom;
(void)_d;
#if 0
upb_decoder *d = _d;
// TODO
if (bottom->end_offset == UPB_NONDELIMITED) {
// TODO: support skipping groups.
abort();
}
d->ptr = d->buf + bottom->end_offset;
d->ptr = d->buf.ptr + bottom->end_offset;
#endif
}
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
@ -354,10 +404,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
d->jit_code = NULL;
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
d->bufstr = NULL;
d->tmp = NULL;
upb_string_recycle(&d->tmp);
// Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) {
upb_mhandlers *m = handlers->msgs[i];
@ -396,19 +442,27 @@ void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
upb_handlers_unref(h);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
uint64_t end_ofs, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
f->end_ofs = end_ofs;
d->end_ofs = end_ofs;
d->refstart_ofs = start_ofs;
d->refend_ofs = start_ofs;
d->bufstart_ofs = start_ofs;
d->bufend_ofs = start_ofs;
d->bytesrc = bytesrc;
d->buf = NULL;
d->ptr = NULL;
d->end = NULL; // Force a buffer pull.
d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered.
d->buf_stream_offset = 0;
#ifdef UPB_USE_JIT_X64
d->jit_end = NULL;
#endif
d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered.
d->strref.bytesrc = bytesrc;
}
void upb_decoder_uninit(upb_decoder *d) {
upb_string_unref(d->bufstr);
upb_string_unref(d->tmp);
#ifdef UPB_USE_JIT_X64
if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
#endif

@ -30,44 +30,33 @@ extern "C" {
struct dasm_State;
struct _upb_decoder {
// Bytesrc from which we pull serialized data.
upb_bytesrc *bytesrc;
typedef struct _upb_decoder {
upb_bytesrc *bytesrc; // Source of our serialized data.
upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
upb_status *status; // Where we will store any errors that occur.
upb_strref strref; // For passing string data to callbacks.
// String to hold our input buffer; is only active if d->buf != NULL.
upb_string *bufstr;
// Offsets for the region we currently have ref'd.
uint64_t refstart_ofs, refend_ofs;
// Temporary string for passing string data to callbacks.
upb_string *tmp;
// Current buffer and its stream offset.
const char *buf, *ptr, *end;
uint64_t bufstart_ofs, bufend_ofs;
// The offset within the overall stream represented by the *beginning* of buf.
size_t buf_stream_offset;
// Stream offset for the end of the top-level message, if any.
uint64_t end_ofs;
// Pointer to the beginning of our current data buffer, or NULL if none.
const char *buf;
// Buf offset as of which we've delivered calbacks; needed for rollback on
// UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
const char *completed_ptr;
// End of this buffer, relative to *ptr.
const char *end;
const char *jit_end;
// End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
// this buf.
uintptr_t delim_end;
// Members which may also be written by the JIT:
// Our current position in the data buffer.
const char *ptr;
// End of this submessage, relative to *ptr.
const char *submsg_end;
// MIN(end, submsg_end)
const char *effective_end;
upb_fhandlers *f;
// Where we will store any errors that occur.
upb_status *status;
// Dispatcher to which we push parsed data.
upb_dispatcher dispatcher;
#ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
// JIT-generated machine code (else NULL).
char *jit_code;
@ -75,21 +64,10 @@ struct _upb_decoder {
char *debug_info;
struct dasm_State *dynasm;
sigjmp_buf exitjmp;
};
// For use in the upb_dispatcher's stack.
typedef struct {
// Relative to the beginning of this buffer.
// For groups and the top-level: UINT32_MAX.
uint32_t end_offset;
bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND
} upb_decoder_srcdata;
#endif
// A upb_decoder decodes the binary protocol buffer format, writing the data it
// decodes to a upb_sink.
struct _upb_decoder;
typedef struct _upb_decoder upb_decoder;
sigjmp_buf exitjmp;
} upb_decoder;
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
@ -107,7 +85,10 @@ void upb_decoder_uninit(upb_decoder *d);
// state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Parsers must be reset before they can be
// used. A decoder can be reset multiple times.
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure);
//
// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
uint64_t end_ofs, void *closure);
void upb_decoder_decode(upb_decoder *d, upb_status *status);

@ -120,7 +120,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.define PTR, rbx
|.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13
|.type STRING, upb_string, r14
|.type STRREF, upb_strref, r14
|.type DECODER, upb_decoder, r15
|
|.macro callp, addr
@ -199,7 +199,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| jae ->exit_jit // Frame stack overflow.
| mov qword FRAME:rax->f, f
| mov qword FRAME:rax->closure, closure_
| mov dword FRAME:rax->end_offset, end_offset_
| mov dword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_
| mov CLOSURE, rdx
| mov DECODER->dispatcher.top, rax
@ -217,17 +217,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| mov rsi, DECODER->jit_end
|| if (m->is_group) {
| mov64 rax, 0xffffffffffffffff
| mov qword DECODER->submsg_end, rax
| mov qword DECODER->delim_end, rax
| mov DECODER->effective_end, rsi
|| } else {
| // Could store a correctly-biased version in the frame, at the cost of
| // a larger stack.
| mov eax, dword FRAME->end_offset
| mov eax, dword FRAME->end_ofs
| add rax, qword DECODER->buf
| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset
| mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs
| cmp rax, rsi
| jb >8
| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end)
| mov rax, rsi // effective_end = min(d->delim_end, d->jit_end)
|8:
| mov DECODER->effective_end, rax
|| }
@ -293,7 +293,7 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
} else {
| mov rdx, CLOSURE
}
| mov esi, FRAME->end_offset
| mov esi, FRAME->end_ofs
| pushframe f, rdx, esi, true
}
@ -357,10 +357,14 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
// buf, which sidesteps any security problems. The C path has more
// robust checks.
| decode_varint tag_size
| mov STRING->len, ARG3_32
| mov STRING->ptr, PTR
| mov STRREF->len, ARG3_32
| mov STRREF->ptr, PTR
| mov rax, PTR
| sub rax, DECODER->buf
| add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
| mov STRREF->stream_offset, eax
| add PTR, ARG3_64
| mov ARG3_64, STRING
| mov ARG3_64, STRREF
| cmp PTR, DECODER->effective_end
| ja ->exit_jit // Can't deliver, whole string not in buf.
break;
@ -514,7 +518,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
// This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel:
| cmp PTR, DECODER->submsg_end
| cmp PTR, DECODER->delim_end
| jb ->exit_jit // We are at eob, but not end-of-submsg.
}
@ -550,7 +554,7 @@ static void upb_decoder_jit(upb_decoder *d) {
| push rbx
| mov DECODER, ARG1_64
| mov FRAME, DECODER:ARG1_64->dispatcher.top
| mov STRING, DECODER:ARG1_64->tmp
| lea STRREF, DECODER:ARG1_64->strref
| mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr

@ -7,18 +7,11 @@
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include "upb_def.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
{
int off = len-1;
while(off > 0 && data[off] != c) --off;
return off;
}
void upb_deflist_init(upb_deflist *l) {
l->size = 8;
l->defs = malloc(l->size * sizeof(void*));
@ -105,7 +98,8 @@ static void upb_def_init(upb_def *def, upb_deftype_t type) {
}
static void upb_def_uninit(upb_def *def) {
upb_string_unref(def->fqname);
//fprintf(stderr, "Freeing def: %p\n", def);
free(def->fqname);
}
@ -120,19 +114,19 @@ typedef struct _upb_unresolveddef {
// The target type name. This may or may not be fully qualified. It is
// tempting to want to use base.fqname for this, but that will be qualified
// which is inappropriate for a name we still have to resolve.
upb_string *name;
char *name;
} upb_unresolveddef;
// Is passed a ref on the string.
static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
upb_unresolveddef *def = malloc(sizeof(*def));
upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
def->name = upb_string_getref(str);
def->name = strdup(str);
return def;
}
static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
upb_string_unref(def->name);
free(def->name);
upb_def_uninit(&def->base);
free(def);
}
@ -152,7 +146,7 @@ static void upb_enumdef_free(upb_enumdef *e) {
upb_enum_iter i;
for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// Frees the ref taken when the string was parsed.
upb_string_unref(upb_enum_iter_name(i));
free(upb_enum_iter_name(i));
}
upb_strtable_free(&e->ntoi);
upb_inttable_free(&e->iton);
@ -170,12 +164,11 @@ upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
return new_e;
}
bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) {
if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false;
upb_ntoi_ent ntoi_ent = {{name, 0}, num};
upb_iton_ent iton_ent = {0, name};
upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name
bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
return false;
upb_strtable_insert(&e->ntoi, name, &num);
upb_inttable_insert(&e->iton, num, strdup(name));
return true;
}
@ -193,19 +186,22 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
return upb_inttable_next(&e->iton, iter);
}
upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
upb_iton_ent *e =
(upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
return e ? e->string : NULL;
const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
return e ? e->str : NULL;
}
bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) {
upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name);
bool upb_enumdef_ntoil(upb_enumdef *def, char *name, size_t len, int32_t *num) {
upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
if (!e) return false;
if (num) *num = e->value;
return true;
}
bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num) {
return upb_enumdef_ntoil(e, name, strlen(name), num);
}
/* upb_fielddef ***************************************************************/
@ -228,9 +224,9 @@ upb_fielddef *upb_fielddef_new() {
static void upb_fielddef_free(upb_fielddef *f) {
if (upb_isstring(f)) {
upb_string_unref(upb_value_getstr(f->defaultval));
free(upb_value_getptr(f->defaultval));
}
upb_string_unref(f->name);
free(f->name);
free(f);
}
@ -270,18 +266,18 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
f->def = def;
if (f->type == UPB_TYPE(ENUM)) {
// Resolve the enum's default from a string to an integer.
upb_string *str = upb_value_getstr(f->defaultval);
char *str = upb_value_getptr(f->defaultval);
assert(str); // Should point to either a real default or the empty string.
upb_enumdef *e = upb_downcast_enumdef(f->def);
int32_t val = 0;
if (str == upb_emptystring()) {
if (str[0] == '\0') {
upb_value_setint32(&f->defaultval, e->defaultval);
} else {
bool success = upb_enumdef_ntoi(e, str, &val);
upb_string_unref(str);
free(str);
if (!success) {
upb_seterr(s, UPB_ERROR, "Default enum value (" UPB_STRFMT ") is not a "
"member of the enum", UPB_STRARG(str));
upb_status_setf(s, UPB_ERROR, "Default enum value (%s) is not a "
"member of the enum", str);
return false;
}
upb_value_setint32(&f->defaultval, val);
@ -295,9 +291,9 @@ void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
f->number = number;
}
void upb_fielddef_setname(upb_fielddef *f, upb_string *name) {
void upb_fielddef_setname(upb_fielddef *f, const char *name) {
assert(f->msgdef == NULL);
f->name = upb_string_getref(name);
f->name = strdup(name);
}
void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
@ -326,7 +322,7 @@ void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl)
f->accessor = vtbl;
}
void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) {
void upb_fielddef_settypename(upb_fielddef *f, const char *name) {
upb_def_unref(f->def);
f->def = UPB_UPCAST(upb_unresolveddef_new(name));
}
@ -424,9 +420,8 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
assert(f->msgdef == NULL);
f->msgdef = m;
upb_itof_ent itof_ent = {0, f};
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
upb_strtable_insert(&m->ntof, f->name, &f);
return true;
}
@ -493,7 +488,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
/* upb_symtabtxn **************************************************************/
typedef struct {
upb_strtable_entry e;
upb_def *def;
} upb_symtab_ent;
@ -503,16 +497,19 @@ void upb_symtabtxn_init(upb_symtabtxn *t) {
void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
upb_strtable *t = &txn->deftab;
upb_symtab_ent *e;
for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e))
upb_def_unref(e->def);
upb_strtable_iter i;
for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
free(e->def);
}
upb_strtable_free(t);
}
bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
// TODO: check if already present.
upb_symtab_ent e = {{def->fqname, 0}, def};
upb_strtable_insert(&t->deftab, &e.e);
upb_symtab_ent e = {def};
//fprintf(stderr, "txn Inserting: %p, ent: %p\n", e.def, &e);
upb_strtable_insert(&t->deftab, def->fqname, &e);
return true;
}
@ -531,59 +528,28 @@ err:
// Given a symbol and the base symbol inside which it is defined, find the
// symbol's definition in t.
static upb_symtab_ent *upb_resolve(upb_strtable *t,
upb_string *base, upb_string *sym) {
if(upb_string_len(sym) == 0) return NULL;
if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) {
const char *base, const char *sym) {
if(strlen(sym) == 0) return NULL;
if(sym[0] == UPB_SYMBOL_SEPARATOR) {
// Symbols starting with '.' are absolute, so we do a single lookup.
// Slice to omit the leading '.'
upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1);
upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
upb_string_unref(sym_str);
return e;
return upb_strtable_lookup(t, sym + 1);
} else {
// Remove components from base until we find an entry or run out.
// TODO: This branch is totally broken, but currently not used.
upb_string *sym_str = upb_string_new();
int baselen = upb_string_len(base);
upb_symtab_ent *ret = NULL;
while(1) {
// sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym
upb_strlen_t len = baselen + upb_string_len(sym) + 1;
char *buf = upb_string_getrwbuf(sym_str, len);
memcpy(buf, upb_string_getrobuf(base), baselen);
buf[baselen] = UPB_SYMBOL_SEPARATOR;
memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym));
upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
if (e) {
ret = e;
break;
} else if(baselen == 0) {
// No more scopes to try.
ret = NULL;
break;
}
baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
}
upb_string_unref(sym_str);
return ret;
(void)base;
assert(false);
return NULL;
}
}
upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) {
return upb_strtable_begin(&t->deftab);
void upb_symtabtxn_begin(upb_symtabtxn_iter *i, upb_symtabtxn *t) {
upb_strtable_begin(i, &t->deftab);
}
upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) {
return upb_strtable_next(&t->deftab, i);
}
bool upb_symtabtxn_done(upb_symtabtxn_iter i) {
return i == NULL;
}
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
upb_symtab_ent *e = iter;
void upb_symtabtxn_next(upb_symtabtxn_iter *i) { upb_strtable_next(i); }
bool upb_symtabtxn_done(upb_symtabtxn_iter *i) { return upb_strtable_done(i); }
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *i) {
const upb_symtab_ent *e = upb_strtable_iter_value(i);
return e->def;
}
@ -591,8 +557,10 @@ upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
/* upb_symtab public interface ************************************************/
static void _upb_symtab_free(upb_strtable *t) {
upb_symtab_ent *e;
for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) {
upb_strtable_iter i;
upb_strtable_begin(&i, t);
for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
assert(upb_atomic_read(&e->def->refcount) == 0);
upb_def_free(e->def);
}
@ -632,9 +600,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
// We may only use part of this, depending on how many symbols are of the
// correct type.
upb_def **defs = malloc(sizeof(*defs) * total);
upb_symtab_ent *e = upb_strtable_begin(&s->symtab);
upb_strtable_iter iter;
upb_strtable_begin(&iter, &s->symtab);
int i = 0;
for(; e; e = upb_strtable_next(&s->symtab, &e->e)) {
for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
upb_def *def = e->def;
assert(def);
if(type == UPB_DEF_ANY || def->type == type)
@ -646,7 +616,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
return defs;
}
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
upb_def *ret = NULL;
@ -658,9 +628,9 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
return ret;
}
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) {
upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym) {
upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol);
upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
upb_def *ret = NULL;
if(e) {
ret = e->def;
@ -692,8 +662,9 @@ bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
if (needcopy && !replacing) {
upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)};
upb_strtable_insert(&txn->deftab, &e.e);
upb_symtab_ent e = {upb_def_dup(def)};
//fprintf(stderr, "Replacing def: %p\n", e.def);
upb_strtable_insert(&txn->deftab, def->fqname, &e);
replacing = true;
}
return replacing;
@ -706,25 +677,29 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
// themselves be replaced with versions that will point to the new defs.
// Do a DFS -- any path that finds a new def must replace all ancestors.
upb_strtable *symtab = &s->symtab;
upb_symtab_ent *e;
for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) {
upb_strtable_iter i;
upb_strtable_begin(&i, symtab);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
upb_symtab_dfs(e->def, open_defs, 0, txn);
}
// Resolve all refs.
upb_strtable *txntab = &txn->deftab;
for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) {
upb_strtable_begin(&i, txntab);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
upb_msgdef *m = upb_dyncast_msgdef(e->def);
if(!m) continue;
// Type names are resolved relative to the message in which they appear.
upb_string *base = m->base.fqname;
const char *base = m->base.fqname;
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
upb_msg_iter j;
for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
upb_fielddef *f = upb_msg_iter_field(j);
if(!upb_hasdef(f)) continue; // No resolving necessary.
upb_string *name = upb_downcast_unresolveddef(f->def)->name;
const char *name = upb_downcast_unresolveddef(f->def)->name;
// Resolve from either the txntab (pending adds) or symtab (existing
// defs). If both exist, prefer the pending add, because it will be
@ -732,17 +707,18 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
upb_symtab_ent *found;
if(!(found = upb_resolve(txntab, base, name)) &&
!(found = upb_resolve(symtab, base, name))) {
upb_seterr(status, UPB_ERROR,
"could not resolve symbol '" UPB_STRFMT "'"
" in context '" UPB_STRFMT "'",
UPB_STRARG(name), UPB_STRARG(base));
upb_status_setf(status, UPB_ERROR, "could not resolve symbol '%s' "
"in context '%s'", name, base);
return false;
}
// Check the type of the found def.
upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
//fprintf(stderr, "found: %p\n", found);
//fprintf(stderr, "found->def: %p\n", found->def);
//fprintf(stderr, "found->def->type: %d\n", found->def->type);
if(found->def->type != expected) {
upb_seterr(status, UPB_ERROR, "Unexpected type");
upb_status_setf(status, UPB_ERROR, "Unexpected type");
return false;
}
if (!upb_fielddef_resolve(f, found->def, status)) return false;
@ -751,9 +727,9 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
// The defs in the transaction have been vetted, and can be moved to the
// symtab without causing errors.
upb_symtab_ent *tmptab_e;
for(tmptab_e = upb_strtable_begin(txntab); tmptab_e;
tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) {
upb_strtable_begin(&i, txntab);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
upb_def_movetosymtab(tmptab_e->def, s);
upb_symtab_ent *symtab_e =
upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
@ -761,7 +737,8 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
upb_deflist_push(&s->olddefs, symtab_e->def);
symtab_e->def = tmptab_e->def;
} else {
upb_strtable_insert(&s->symtab, &tmptab_e->e);
//fprintf(stderr, "Inserting def: %p\n", tmptab_e->def);
upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
}
}

@ -32,7 +32,7 @@ typedef struct _upb_symtab upb_symtab;
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
UPB_DEF_MSG = 0,
UPB_DEF_MSG = 1,
UPB_DEF_ENUM,
UPB_DEF_SERVICE, // Not yet implemented.
@ -44,7 +44,7 @@ typedef enum {
/* upb_def: base class for defs **********************************************/
typedef struct {
upb_string *fqname; // Fully qualified.
char *fqname; // Fully qualified.
upb_symtab *symtab; // Def is mutable iff symtab == NULL.
upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
upb_deftype_t type;
@ -66,7 +66,7 @@ upb_def *upb_def_dup(upb_def *def);
// A upb_fielddef describes a single field in a message. It isn't a full def
// in the sense that it derives from upb_def. It cannot stand on its own; it
// must be part of a upb_msgdef. It is also reference-counted.
struct _upb_fielddef {
typedef struct _upb_fielddef {
struct _upb_msgdef *msgdef;
upb_def *def; // if upb_hasdef(f)
upb_atomic_t refcount;
@ -78,11 +78,11 @@ struct _upb_fielddef {
int16_t hasbit;
uint16_t offset;
int32_t number;
upb_string *name;
char *name;
upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
upb_value fval;
struct _upb_accessor_vtbl *accessor;
};
} upb_fielddef;
upb_fielddef *upb_fielddef_new();
void upb_fielddef_ref(upb_fielddef *f);
@ -93,7 +93,7 @@ upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
@ -114,7 +114,7 @@ upb_def *upb_fielddef_subdef(upb_fielddef *f);
// added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
void upb_fielddef_setname(upb_fielddef *f, upb_string *name);
void upb_fielddef_setname(upb_fielddef *f, const char *name);
// These writers may be called at any time prior to being put in a symtab.
void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
@ -124,7 +124,7 @@ void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
// The name of the message or enum this field is referring to. Must be found
// at name resolution time (when the symtabtxn is committed to the symtab).
void upb_fielddef_settypename(upb_fielddef *f, upb_string *name);
void upb_fielddef_settypename(upb_fielddef *f, const char *name);
// A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
@ -227,7 +227,7 @@ INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
return e ? e->f : NULL;
}
INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) {
INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, char *name) {
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
return e ? e->f : NULL;
}
@ -272,7 +272,7 @@ typedef struct {
typedef struct {
bool junk;
upb_string *string;
char *str;
} upb_iton_ent;
upb_enumdef *upb_enumdef_new();
@ -288,12 +288,13 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this
// name or number (returns false and does not add if there is). May only
// be called before the enumdef is in a symtab.
bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num);
bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num);
// Caller does not own a ref on the returned string.
upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num);
bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
// Caller does not own the returned string.
const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined.
// Adding an enum val invalidates any iterators.
@ -308,9 +309,9 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
// Iterator accessors.
INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
return e->string;
return e->str;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
return upb_inttable_iter_key(iter);
@ -340,7 +341,7 @@ bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
// Gets the def (if any) that is associated with this name in the symtab.
// Caller does *not* inherit a ref on the def.
upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
upb_def *upb_symtabtxn_get(upb_symtabtxn *t, char *name);
// Iterate over the defs that are part of the transaction.
// The order is undefined.
@ -350,12 +351,12 @@ upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
// i = upb_symtabtxn_next(t, i)) {
// upb_def *def = upb_symtabtxn_iter_def(i);
// }
typedef void* upb_symtabtxn_iter;
typedef upb_strtable_iter upb_symtabtxn_iter;
upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t);
upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i);
bool upb_symtabtxn_done(upb_symtabtxn_iter i);
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter);
void upb_symtabtxn_begin(upb_symtabtxn_iter* i, upb_symtabtxn *t);
void upb_symtabtxn_next(upb_symtabtxn_iter *i);
bool upb_symtabtxn_done(upb_symtabtxn_iter *i);
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *iter);
/* upb_symtab *****************************************************************/
@ -397,12 +398,12 @@ void upb_symtab_unref(upb_symtab *s);
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
// TODO: make return const
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
// TODO: make return const
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref

@ -9,19 +9,22 @@
#include <stdlib.h>
#include <errno.h>
#include "upb_string.h"
#include "upb_def.h"
/* Joins strings together, for example:
* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
* join("", "Baz") -> "Baz"
* Caller owns a ref on the returned string. */
static upb_string *upb_join(upb_string *base, upb_string *name) {
if (!base || upb_string_len(base) == 0) {
return upb_string_getref(name);
// Returns a newly allocated string that joins input strings together, for example:
// join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
// join("", "Baz") -> "Baz"
// Caller owns a ref on the returned string. */
static char *upb_join(char *base, char *name) {
if (!base || strlen(base) == 0) {
return strdup(name);
} else {
return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
UPB_STRARG(base), UPB_STRARG(name));
char *ret = malloc(strlen(base) + strlen(name) + 2);
ret[0] = '\0';
strcat(ret, base);
strcat(ret, ".");
strcat(ret, name);
return ret;
}
}
@ -36,12 +39,12 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
}
// Qualify the defname for all defs starting with offset "start" with "str".
static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
for(uint32_t i = start; i < l->len; i++) {
upb_def *def = l->defs[i];
upb_string *name = def->fqname;
char *name = def->fqname;
def->fqname = upb_join(str, name);
upb_string_unref(name);
free(name);
}
}
@ -59,13 +62,13 @@ void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
}
void upb_descreader_uninit(upb_descreader *r) {
upb_string_unref(r->name);
free(r->name);
upb_status_uninit(&r->status);
upb_deflist_uninit(&r->defs);
upb_string_unref(r->default_string);
free(r->default_string);
while (r->stack_len > 0) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_string_unref(f->name);
free(f->name);
}
}
@ -91,13 +94,14 @@ void upb_descreader_startcontainer(upb_descreader *r) {
void upb_descreader_endcontainer(upb_descreader *r) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_deflist_qualify(&r->defs, f->name, f->start);
upb_string_unref(f->name);
free(f->name);
f->name = NULL;
}
void upb_descreader_setscopename(upb_descreader *r, upb_string *str) {
void upb_descreader_setscopename(upb_descreader *r, char *str) {
upb_descreader_frame *f = &r->stack[r->stack_len-1];
upb_string_unref(f->name);
f->name = upb_string_getref(str);
free(f->name);
f->name = str;
}
// Handlers for google.protobuf.FileDescriptorProto.
@ -119,7 +123,7 @@ static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_descreader_setscopename(r, upb_value_getstr(val));
upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val)));
return UPB_CONTINUE;
}
@ -190,8 +194,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_string_unref(r->name);
r->name = upb_string_getref(upb_value_getstr(val));
free(r->name);
r->name = upb_strref_dup(upb_value_getstrref(val));
r->saw_name = true;
return UPB_CONTINUE;
}
@ -210,7 +214,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_status *status) {
upb_descreader *r = _r;
if(!r->saw_number || !r->saw_name) {
upb_seterr(status, UPB_ERROR, "Enum value missing name or number.");
upb_status_setf(status, UPB_ERROR, "Enum value missing name or number.");
return;
}
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
@ -220,7 +224,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_enumdef_setdefault(e, r->number);
}
upb_enumdef_addval(e, r->name, r->number);
upb_string_unref(r->name);
free(r->name);
r->name = NULL;
}
@ -254,11 +258,11 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status)
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
upb_seterr(status, UPB_ERROR, "Enum had no name.");
upb_status_setf(status, UPB_ERROR, "Enum had no name.");
return;
}
if (upb_inttable_count(&e->iton) == 0) {
upb_seterr(status, UPB_ERROR, "Enum had no values.");
upb_status_setf(status, UPB_ERROR, "Enum had no values.");
return;
}
}
@ -269,8 +273,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
(void)fval;
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
upb_string_unref(e->base.fqname);
e->base.fqname = upb_string_getref(upb_value_getstr(val));
free(e->base.fqname);
e->base.fqname = upb_strref_dup(upb_value_getstrref(val));
return UPB_CONTINUE;
}
@ -298,99 +302,73 @@ static upb_flow_t upb_fielddef_startmsg(void *_r) {
return UPB_CONTINUE;
}
// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
// Converts the default value in string "str" into "d". Passes a ref on str.
// Returns true on success.
static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) {
static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
bool success = true;
if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
// We'll keep the ref we had on it. We include enums in this case because
// we need the enumdef to resolve the name, but we may not have it yet.
// We'll resolve it later.
if (dstr) {
upb_value_setstr(d, dstr);
} else {
upb_value_setstr(d, upb_emptystring());
}
if (!str) str = strdup("");
upb_value_setptr(d, str);
} else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
// We don't expect to get a default value.
upb_string_unref(dstr);
if (dstr != NULL) success = false;
free(str);
if (str != NULL) success = false;
} else if (type == UPB_TYPE(BOOL)) {
if (!str || strcmp(str, "false") == 0)
upb_value_setbool(d, false);
else if (strcmp(str, "true") == 0)
upb_value_setbool(d, true);
else
success = false;
free(str);
} else {
// The strto* functions need the string to be NULL-terminated.
char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
if (!str) str = strdup("0");
char *end;
upb_string_unref(dstr);
switch (type) {
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(SFIXED32):
if (strz) {
long val = strtol(strz, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
success = false;
else
upb_value_setint32(d, val);
} else {
upb_value_setint32(d, 0);
}
case UPB_TYPE(SFIXED32): {
long val = strtol(str, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
success = false;
else
upb_value_setint32(d, val);
break;
}
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
if (strz) {
upb_value_setint64(d, strtoll(strz, &end, 0));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setint64(d, 0);
}
upb_value_setint64(d, strtoll(str, &end, 0));
if (errno == ERANGE || *end) success = false;
break;
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
if (strz) {
unsigned long val = strtoul(strz, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || *end)
success = false;
else
upb_value_setuint32(d, val);
} else {
upb_value_setuint32(d, 0);
}
case UPB_TYPE(FIXED32): {
unsigned long val = strtoul(str, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || *end)
success = false;
else
upb_value_setuint32(d, val);
break;
}
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
if (strz) {
upb_value_setuint64(d, strtoull(strz, &end, 0));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setuint64(d, 0);
}
upb_value_setuint64(d, strtoull(str, &end, 0));
if (errno == ERANGE || *end) success = false;
break;
case UPB_TYPE(DOUBLE):
if (strz) {
upb_value_setdouble(d, strtod(strz, &end));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setdouble(d, 0.0);
}
upb_value_setdouble(d, strtod(str, &end));
if (errno == ERANGE || *end) success = false;
break;
case UPB_TYPE(FLOAT):
if (strz) {
upb_value_setfloat(d, strtof(strz, &end));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setfloat(d, 0.0);
}
break;
case UPB_TYPE(BOOL):
if (!strz || strcmp(strz, "false") == 0)
upb_value_setbool(d, false);
else if (strcmp(strz, "true") == 0)
upb_value_setbool(d, true);
else
success = false;
upb_value_setfloat(d, strtof(str, &end));
if (errno == ERANGE || *end) success = false;
break;
}
free(strz);
free(str);
}
return success;
}
@ -405,13 +383,13 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
// Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_descreader_top(r);
upb_msgdef_addfield(m, f);
upb_string *dstr = r->default_string;
char *dstr = r->default_string;
r->default_string = NULL;
upb_value val;
if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
// We don't worry too much about giving a great error message since the
// compiler should have ensured this was correct.
upb_seterr(status, UPB_ERROR, "Error converting default value.");
upb_status_setf(status, UPB_ERROR, "Error converting default value.");
return;
}
upb_fielddef_setdefault(f, val);
@ -441,7 +419,9 @@ static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val)
static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_setname(r->f, upb_value_getstr(val));
char *name = upb_strref_dup(upb_value_getstrref(val));
upb_fielddef_setname(r->f, name);
free(name);
return UPB_CONTINUE;
}
@ -449,7 +429,9 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_settypename(r->f, upb_value_getstr(val));
char *name = upb_strref_dup(upb_value_getstrref(val));
upb_fielddef_settypename(r->f, name);
free(name);
return UPB_CONTINUE;
}
@ -459,8 +441,8 @@ static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
upb_descreader *r = _r;
// Have to convert from string to the correct type, but we might not know the
// type yet.
upb_string_unref(r->default_string);
r->default_string = upb_string_getref(upb_value_getstr(val));
free(r->default_string);
r->default_string = upb_strref_dup(upb_value_getstrref(val));
return UPB_CONTINUE;
}
@ -501,7 +483,7 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_msgdef *m = upb_descreader_top(r);
if(!m->base.fqname) {
upb_seterr(status, UPB_ERROR, "Encountered message with no name.");
upb_status_setf(status, UPB_ERROR, "Encountered message with no name.");
return;
}
@ -514,9 +496,9 @@ static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
upb_descreader *r = _r;
assert(val.type == UPB_TYPE(STRING));
upb_msgdef *m = upb_descreader_top(r);
upb_string_unref(m->base.fqname);
m->base.fqname = upb_string_getref(upb_value_getstr(val));
upb_descreader_setscopename(r, upb_value_getstr(val));
free(m->base.fqname);
m->base.fqname = upb_strref_dup(upb_value_getstrref(val));
upb_descreader_setscopename(r, strdup(m->base.fqname));
return UPB_CONTINUE;
}

@ -28,7 +28,7 @@ extern "C" {
// definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes).
typedef struct {
upb_string *name;
char *name;
// Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1.
int start;
@ -42,11 +42,11 @@ typedef struct {
upb_status status;
uint32_t number;
upb_string *name;
char *name;
bool saw_number;
bool saw_name;
upb_string *default_string;
char *default_string;
upb_fielddef *f;
} upb_descreader;

@ -12,15 +12,15 @@
#include "upb_strstream.h"
#include "upb_textprinter.h"
void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md,
void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder d;
upb_decoder_initformsgdef(&d, md);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_decode(&d, status);
upb_stringsrc_uninit(&strsrc);
@ -53,10 +53,11 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
#endif
// TODO: read->load.
void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) {
void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len,
upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new();
upb_descreader_reghandlers(h);
@ -68,16 +69,16 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status
upb_symtabtxn txn;
upb_symtabtxn_init(&txn);
upb_descreader_init(&r, &txn);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
upb_decoder_decode(&d, status);
// Set default accessors and layouts on all messages.
// for msgdef in symtabtxn:
upb_symtabtxn_iter i;
for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i);
i = upb_symtabtxn_next(&txn, i)) {
upb_def *def = upb_symtabtxn_iter_def(i);
upb_symtabtxn_begin(&i, &txn);
for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) {
upb_def *def = upb_symtabtxn_iter_def(&i);
upb_msgdef *md = upb_dyncast_msgdef(def);
if (!md) return;
// For field in msgdef:
@ -96,3 +97,33 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
char *upb_readfile(const char *filename, size_t *len) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
char *buf = malloc(size);
if(fread(buf, size, 1, f) != 1) goto error;
fclose(f);
if (len) *len = size;
return buf;
error:
fclose(f);
return NULL;
}
void upb_read_descriptorfile(upb_symtab *symtab, const char *fname,
upb_status *status) {
size_t len;
char *data = upb_readfile(fname, &len);
if (!data) {
upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname);
return;
}
upb_read_descriptor(symtab, data, len, status);
free(data);
}

@ -27,6 +27,7 @@
#define UPB_GLUE_H
#include <stdbool.h>
#include "upb.h"
#ifdef __cplusplus
extern "C" {
@ -36,20 +37,23 @@ extern "C" {
// Clients should use the regular, typedef'd names (eg. upb_string).
struct _upb_msg;
struct _upb_msgdef;
struct _upb_status;
struct _upb_string;
struct _upb_symtab;
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
void upb_strtomsg(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, struct _upb_status *s);
void upb_strtomsg(const char *str, size_t len, void *msg,
struct _upb_msgdef *md, upb_status *s);
void upb_msgtotext(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, bool single_line);
//void upb_msgtotext(struct _upb_string *str, void *msg,
// struct _upb_msgdef *md, bool single_line);
void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str,
struct _upb_status *status);
void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len,
upb_status *status);
void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname,
upb_status *status);
char *upb_readfile(const char *filename, size_t *len);
#ifdef __cplusplus
} /* extern "C" */

@ -96,7 +96,6 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
}
typedef struct {
upb_strtable_entry e;
upb_mhandlers *mh;
} upb_mtab_ent;
@ -105,8 +104,8 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
upb_onfieldreg *fieldreg_cb,
void *closure, upb_strtable *mtab) {
upb_mhandlers *mh = upb_handlers_newmhandlers(h);
upb_mtab_ent e = {{m->base.fqname, 0}, mh};
upb_strtable_insert(mtab, &e.e);
upb_mtab_ent e = {mh};
upb_strtable_insert(mtab, m->base.fqname, &e);
if (msgreg_cb) msgreg_cb(closure, mh, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
@ -153,7 +152,7 @@ static upb_fhandlers toplevel_f = {
#ifdef NDEBUG
{{0}},
#else
{{0}, UPB_VALUETYPE_RAW},
{{0}, -1},
#endif
NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
@ -198,23 +197,23 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
assert(d->top == d->stack);
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
// TODO: should we avoid this copy by passing client's status obj to cbs?
upb_copyerr(status, &d->status);
upb_status_copy(status, &d->status);
}
void indent(upb_dispatcher *d) {
for (int i = 0; i < (d->top - d->stack); i++) printf(" ");
for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
}
void indentm1(upb_dispatcher *d) {
for (int i = 0; i < (d->top - d->stack - 1); i++) printf(" ");
for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
}
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_fhandlers *f) {
//indent(d);
//printf("START SEQ: %d\n", f->number);
//fprintf(stderr, "START SEQ: %d\n", f->number);
if((d->top+1) >= d->limit) {
upb_seterr(&d->status, UPB_ERROR, "Nesting too deep.");
upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy.
}
@ -235,7 +234,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
//indentm1(d);
//printf("END SEQ\n");
//fprintf(stderr, "END SEQ\n");
assert(d->top > d->stack);
assert(d->top->is_sequence);
upb_fhandlers *f = d->top->f;
@ -255,9 +254,9 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f) {
//indent(d);
//printf("START SUBMSG: %d\n", f->number);
//fprintf(stderr, "START SUBMSG: %d\n", f->number);
if((d->top+1) >= d->limit) {
upb_seterr(&d->status, UPB_ERROR, "Nesting too deep.");
upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy.
}
@ -281,7 +280,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
//indentm1(d);
//printf("END SUBMSG\n");
//fprintf(stderr, "END SUBMSG\n");
assert(d->top > d->stack);
assert(!d->top->is_sequence);
upb_fhandlers *f = d->top->f;

@ -17,6 +17,7 @@
#include <limits.h>
#include "upb.h"
#include "upb_def.h"
#include "upb_bytestream.h"
#ifdef __cplusplus
extern "C" {
@ -303,14 +304,12 @@ typedef struct {
// Members to use as the data source requires.
void *srcclosure;
uint64_t end_ofs;
uint16_t msgindex;
uint16_t fieldindex;
uint32_t end_offset;
// Does this frame represent a sequence or a submsg (f might be both).
// We only need a single bit here, but this will make each individual
// frame grow from 32 to 40 bytes on LP64, which is a bit excessive.
bool is_sequence;
bool is_sequence; // frame represents seq or submsg? (f might be both).
bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed)
} upb_dispatcher_frame;
// Called when some of the input needs to be skipped. All frames from

@ -7,6 +7,7 @@
* Data structure for storing a message of protobuf data.
*/
#include "upb.h"
#include "upb_msg.h"
void upb_msg_clear(void *msg, upb_msgdef *md) {
@ -132,23 +133,23 @@ UPB_ACCESSORS(bool, bool)
UPB_ACCESSORS(ptr, void*)
#undef UPB_ACCESSORS
static void _upb_stdmsg_setstr(void *_dst, upb_value _src) {
// We do:
// - upb_string_recycle(), upb_string_substr() instead of
// - upb_string_unref(), upb_string_getref()
// because we can conveniently cache these upb_string objects in the
// upb_msg, whereas the upb_src who is sending us these strings may not
// have a good way of caching them. This saves the upb_src from allocating
// new upb_strings all the time to give us.
//
// If you were using this to copy one upb_msg to another this would
// allocate string objects whereas a upb_string_getref could have avoided
// those allocations completely; if this is an issue, we could make it an
// option of the upb_msgsink which behavior is desired.
upb_string **dst = _dst;
upb_string *src = upb_value_getstr(_src);
upb_string_recycle(dst);
upb_string_substr(*dst, src, 0, upb_string_len(src));
static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
upb_stdarray **dstp = _dst;
upb_stdarray *dst = *dstp;
if (!dst) {
dst = malloc(sizeof(*dst));
dst->size = 0;
dst->ptr = NULL;
*dstp = dst;
}
dst->len = 0;
upb_strref *ref = upb_value_getstrref(src);
if (ref->len > dst->size) {
dst->size = ref->len;
dst->ptr = realloc(dst->ptr, dst->size);
}
dst->len = ref->len;
upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr);
}
upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
@ -166,15 +167,11 @@ upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
}
upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
upb_value val = upb_stdmsg_getptr(m, fval);
upb_value_setstr(&val, upb_value_getptr(val));
return val;
return upb_stdmsg_getptr(m, fval);
}
upb_value upb_stdmsg_seqgetstr(void *i) {
upb_value val = upb_stdmsg_seqgetptr(i);
upb_value_setstr(&val, upb_value_getptr(val));
return val;
return upb_stdmsg_seqgetptr(i);
}
void *upb_stdmsg_new(upb_msgdef *md) {
@ -188,11 +185,13 @@ void upb_stdseq_free(void *s, upb_fielddef *f) {
upb_stdarray *a = s;
if (upb_issubmsg(f) || upb_isstring(f)) {
void **p = (void**)a->ptr;
for (int i = 0; i < a->size; i++) {
for (uint32_t i = 0; i < a->size; i++) {
if (upb_issubmsg(f)) {
upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
} else {
upb_string_unref(p[i]);
upb_stdarray *str = p[i];
free(str->ptr);
free(str);
}
}
}
@ -213,7 +212,9 @@ void upb_stdmsg_free(void *m, upb_msgdef *md) {
} else if (upb_issubmsg(f)) {
upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
} else {
upb_string_unref(subp);
upb_stdarray *str = subp;
free(str->ptr);
free(str);
}
}
free(m);

@ -148,7 +148,7 @@ typedef struct {
void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
void upb_msgvisitor_uninit(upb_msgvisitor *v);
void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m);
void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
@ -183,8 +183,8 @@ upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
// if necessary.
typedef struct {
char *ptr;
int32_t len; // Number of elements present.
int32_t size; // Number of elements allocated.
uint32_t len; // Number of elements present.
uint32_t size; // Number of elements allocated.
} upb_stdarray;
upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);

@ -9,96 +9,158 @@
#include <stddef.h>
#include <stdlib.h>
#include "upb_string.h"
#include <string.h>
// We can make this configurable if necessary.
#define BLOCK_SIZE 4096
#define BUF_SIZE 32768
struct upb_stdio {
upb_bytesrc bytesrc;
upb_bytesink bytesink;
FILE *file;
};
void upb_stdio_reset(upb_stdio *stdio, FILE* file) {
stdio->file = file;
/* upb_bytesrc methods ********************************************************/
int upb_stdio_cmpbuf(const void *_key, const void *_elem) {
const uint64_t *ofs = _key;
const upb_stdio_buf *buf = _elem;
return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE);
}
static upb_stdio_buf *upb_stdio_findbuf(upb_stdio *s, uint64_t ofs) {
// TODO: it is probably faster to linear search short lists, and to
// special-case the last one or two bufs.
return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf);
}
/* upb_bytesrc methods ********************************************************/
//static upb_strlen_t upb_stdio_read(void *src, uint32_t ofs, upb_buf *b,
// upb_status *status) {
// upb_stdio *stdio = (upb_stdio*)src;
// size_t read = fread(buf, 1, BLOCK_SIZE, stdio->file);
// if(read < (size_t)BLOCK_SIZE) {
// // Error or EOF.
// if(feof(stdio->file)) {
// upb_seterr(status, UPB_EOF, "");
// } else if(ferror(stdio->file)) {
// upb_status_fromerrno(s);
// return 0;
// }
// }
// b->len = read;
// stdio->next_ofs += read;
// return stdio->next_ofs;
//}
size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
(void)src;
(void)ofs;
(void)s;
return 0;
}
static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf,
upb_strlen_t count, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)src;
assert(count > 0);
size_t read = fread(buf, 1, count, stdio->file);
if(read < (size_t)count) {
// Error or EOF.
if(feof(stdio->file)) {
upb_seterr(status, UPB_EOF, "");
return read;
} else if(ferror(stdio->file)) {
upb_seterr(status, UPB_ERROR, "Error reading from stdio stream.");
return -1;
}
void upb_stdio_read(void *src, uint64_t src_ofs, size_t len, char *dst) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs);
src_ofs -= buf->ofs;
memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs);
len -= (BUF_SIZE - src_ofs);
dst += (BUF_SIZE - src_ofs);
while (len > 0) {
++buf;
size_t bytes = UPB_MIN(len, BUF_SIZE);
memcpy(dst, buf->data, bytes);
len -= bytes;
dst += bytes;
}
return read;
}
static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str,
upb_status *status) {
upb_strlen_t read = upb_stdio_read(
src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status);
if (read <= 0) return false;
upb_string_getrwbuf(str, read);
return true;
const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
*len = BUF_SIZE - ofs;
return &buf->data[ofs];
}
void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
len -= (BUF_SIZE - ofs);
++buf->refcount;
while (len > 0) {
++buf;
++buf->refcount;
}
}
void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) {
(void)src;
(void)ofs;
(void)len;
}
/* upb_bytesink methods *******************************************************/
#if 0
upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if(written < len) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
return written;
}
#endif
upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
const char *fmt, va_list args) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
upb_strlen_t written = vfprintf(stdio->file, fmt, args);
uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
const char *fmt, va_list args) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
int written = vfprintf(stdio->file, fmt, args);
if (written < 0) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
}
return written;
}
upb_stdio *upb_stdio_new() {
void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
upb_stdio_fetch,
upb_stdio_read,
upb_stdio_getstr,
upb_stdio_getptr,
upb_stdio_refregion,
upb_stdio_unrefregion,
NULL,
NULL
};
upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
static upb_bytesink_vtbl bytesink_vtbl = {
upb_stdio_putstr,
upb_stdio_vprintf
};
//static upb_bytesink_vtbl bytesink_vtbl = {
// upb_stdio_putstr,
// upb_stdio_vprintf
//};
//upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
}
upb_stdio *stdio = malloc(sizeof(*stdio));
upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl);
upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
return stdio;
void upb_stdio_reset(upb_stdio* stdio, FILE *file) {
stdio->file = file;
stdio->should_close = false;
}
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s) {
FILE *f = fopen(filename, mode);
if (!f) {
upb_status_fromerrno(s);
return;
}
setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own.
upb_stdio_reset(stdio, f);
stdio->should_close = true;
}
void upb_stdio_free(upb_stdio *stdio) {
free(stdio);
void upb_stdio_uninit(upb_stdio *stdio) {
// Can't report status; caller should flush() to ensure data is written.
if (stdio->should_close) fclose(stdio->file);
stdio->file = NULL;
}
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; }
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }

@ -5,7 +5,12 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*
* This file provides upb_bytesrc and upb_bytesink implementations for
* ANSI C stdio.
* ANSI C stdio, which is less efficient than posixfd, but more portable.
*
* Specifically, stdio functions acquire locks on every operation (unless you
* use the f{read,write,...}_unlocked variants, which are not standard) and
* performs redundant buffering (unless you disable it with setvbuf(), but we
* can only do this on newly-opened filehandles).
*/
#include <stdio.h>
@ -18,21 +23,44 @@
extern "C" {
#endif
struct upb_stdio;
typedef struct upb_stdio upb_stdio;
typedef struct {
uint64_t ofs;
uint32_t refcount;
char data[];
} upb_stdio_buf;
// We use a single object for both bytesrc and bytesink for simplicity.
// The object is still not thread-safe, and may only be used by one reader
// and one writer at a time.
typedef struct {
upb_bytesrc src;
upb_bytesink sink;
FILE *file;
bool should_close;
upb_stdio_buf **bufs;
uint32_t nbuf, szbuf;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
// Caller should call upb_stdio_flush prior to calling this to ensure that
// all data is flushed, otherwise data can be silently dropped if an error
// occurs flushing the remaining buffers.
void upb_stdio_uninit(upb_stdio *stdio);
// Resets the object to read/write to the given "file." The caller is
// responsible for closing the file, which must outlive this object.
void upb_stdio_reset(upb_stdio *stdio, FILE *file);
// Creation/deletion.
upb_stdio *upb_stdio_new();
void upb_stdio_free(upb_stdio *stdio);
// As an alternative to upb_stdio_reset(), initializes the object by opening a
// file, and will handle closing it. This may result in more efficient I/O
// than the previous since we can call setvbuf() to disable buffering.
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s);
// Reset/initialize the object for use. The src or sink will call
// fread()/fwrite()/etc. on the given FILE*.
void upb_stdio_reset(upb_stdio *stdio, FILE* file);
// Must be called to cleanup after the object, including closing the file if
// it was opened with upb_stdio_open() (which can fail, hence the status).
//
// Gets a bytesrc or bytesink for the given stdio. The returned pointer is
// invalidated by upb_stdio_reset above. It is perfectly valid to get both
// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading
// and writing.
upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);

@ -1,164 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb_string.h"
#include <stdlib.h>
#ifdef __GLIBC__
#include <malloc.h>
#elif defined(__APPLE__)
#include <malloc/malloc.h>
#endif
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
upb_string *upb_string_new() {
upb_string *str = malloc(sizeof(*str));
str->ptr = NULL;
str->cached_mem = NULL;
str->len = 0;
#ifndef UPB_HAVE_MSIZE
str->size = 0;
#endif
str->src = NULL;
upb_atomic_init(&str->refcount, 1);
return str;
}
uint32_t upb_string_size(upb_string *str) {
#ifdef __GLIBC__
return malloc_usable_size(str->cached_mem);
#elif defined(__APPLE__)
return malloc_size(str->cached_mem);
#else
return str->size;
#endif
}
void _upb_string_free(upb_string *str) {
free(str->cached_mem);
_upb_string_release(str);
free(str);
}
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
// assert(str->ptr == NULL);
upb_strlen_t size = upb_string_size(str);
if (size < len) {
size = upb_round_up_pow2(len);
str->cached_mem = realloc(str->cached_mem, size);
#ifndef UPB_HAVE_MSIZE
str->size = size;
#endif
}
str->len = len;
str->ptr = str->cached_mem;
return str->cached_mem;
}
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
assert(str->ptr == NULL);
assert(start + len <= upb_string_len(target_str));
if (target_str->src) {
start += (target_str->ptr - target_str->src->ptr);
target_str = target_str->src;
}
str->src = upb_string_getref(target_str);
str->ptr = upb_string_getrobuf(target_str) + start;
str->len = len;
}
size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format,
va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
uint32_t size = UPB_MAX(upb_string_size(str) - offset, 16);
char *buf = upb_string_getrwbuf(str, offset + size) + offset;
va_list args_copy;
va_copy(args_copy, args);
uint32_t true_size = vsnprintf(buf, size, format, args_copy);
va_end(args_copy);
// Resize to be the correct size.
if (true_size >= size) {
// Need to print again, because some characters were truncated. vsnprintf
// has weird behavior (and contrary IMO to what the standard says): it will
// not write the entire string unless you give it space to store the NULL
// terminator also. So we can't give it space for the string itself and
// let NULL get truncated (after all, we don't care about it): we *must*
// give it space for NULL.
buf = upb_string_getrwbuf(str, offset + true_size + 1) + offset;
vsnprintf(buf, true_size + 1, format, args);
}
str->len = offset + true_size;
return true_size;
}
upb_string *upb_string_asprintf(const char *format, ...) {
upb_string *str = upb_string_new();
va_list args;
va_start(args, format);
upb_string_vprintf(str, format, args);
va_end(args);
return str;
}
upb_string *upb_strdup(upb_string *s) {
upb_string *str = upb_string_new();
upb_strcpy(str, s);
return str;
}
void upb_strcat(upb_string *s, upb_string *append) {
uint32_t old_size = upb_string_len(s);
uint32_t append_size = upb_string_len(append);
uint32_t new_size = old_size + append_size;
char *buf = upb_string_getrwbuf(s, new_size);
memcpy(buf + old_size, upb_string_getrobuf(append), append_size);
}
upb_string *upb_strreadfile(const char *filename) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
upb_string *s = upb_string_new();
char *buf = upb_string_getrwbuf(s, size);
if(fread(buf, size, 1, f) != 1) goto error;
fclose(f);
return s;
error:
fclose(f);
return NULL;
}
upb_string *upb_emptystring() {
static upb_string empty = UPB_STATIC_STRING("");
return &empty;
}
char *upb_string_newcstr(upb_string *str) {
upb_strlen_t len = upb_string_len(str);
char *ret = malloc(len+1);
memcpy(ret, upb_string_getrobuf(str), len);
ret[len] = '\0';
return ret;
}

@ -1,394 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* This file defines a simple string type which is length-delimited instead
* of NULL-terminated, and which has useful sharing semantics.
*
* The overriding goal of upb_string is to avoid memcpy(), malloc(), and free()
* wheverever possible, while keeping both CPU and memory overhead low.
* Throughout upb there are situations where one wants to reference all or part
* of another string without copying. upb_string provides APIs for doing this,
* and allows the referenced string to be kept alive for as long as anyone is
* referencing it.
*
* Characteristics of upb_string:
* - strings are reference-counted.
* - strings are immutable (can be mutated only when first created or recycled).
* - if a string has no other referents, it can be "recycled" into a new string
* without having to reallocate the upb_string.
* - strings can be substrings of other strings (owning a ref on the source
* string).
*
* Reference-counted strings have recently fallen out of favor because of the
* performance impacts of doing thread-safe reference counting with atomic
* operations. We side-step this issue by not performing atomic operations
* unless the string has been marked thread-safe. Time will tell whether this
* scheme is easy and convenient enough to be practical.
*
* Strings are expected to be 8-bit-clean, but "char*" is such an entrenched
* idiom that we go with it instead of making our pointers uint8_t*.
*
* WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE
* UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if
* you are logically passing a reference to a upb_string to another thread
* (which implies that the other thread must eventually call unref of recycle),
* you have two options:
*
* - create a copy of the string that will be used in the other thread only.
* - call upb_string_get_synchronized_ref(), which will make getref, unref, and
* recycle thread-safe for this upb_string.
*/
#ifndef UPB_STRING_H
#define UPB_STRING_H
#include <assert.h>
#include <string.h>
#include <stdarg.h>
#include "upb_atomic.h"
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
// All members of this struct are private, and may only be read/written through
// the associated functions.
struct _upb_string {
// The string's refcount.
upb_atomic_t refcount;
// The pointer to our currently active data. This may be memory we own
// or a pointer into memory we don't own.
const char *ptr;
// If non-NULL, this is a block of memory we own. We keep this cached even
// if "ptr" is currently aliasing memory we don't own.
char *cached_mem;
// The effective length of the string (the bytes at ptr).
int32_t len;
#ifndef UPB_HAVE_MSIZE
// How many bytes are allocated in cached_mem.
//
// Many platforms have a function that can tell you the size of a block
// that was previously malloc'd. In this case we can avoid storing the
// size explicitly.
uint32_t size;
#endif
// Used if this is a slice of another string, NULL otherwise. We own a ref
// on src.
struct _upb_string *src;
};
// Internal-only initializer for upb_string instances.
#ifdef UPB_HAVE_MSIZE
#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL}
#else
#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL}
#endif
// Special pseudo-refcounts for static/stack-allocated strings, respectively.
#define _UPB_STRING_REFCOUNT_STATIC -1
#define _UPB_STRING_REFCOUNT_STACK -2
// Returns a newly-created, empty, non-finalized string. When the string is no
// longer needed, it should be unref'd, never freed directly.
upb_string *upb_string_new();
// Internal-only; clients should call upb_string_unref().
void _upb_string_free(upb_string *str);
// Releases a ref on the given string, which may free the memory. "str"
// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
// UNLESS THE STRING IS SYNCHRONIZED.
INLINE void upb_string_unref(upb_string *str) {
if (str) {
}
if (str && upb_atomic_read(&str->refcount) > 0 &&
upb_atomic_unref(&str->refcount)) {
_upb_string_free(str);
}
}
static void _upb_string_release(upb_string *str) {
if(str->src) {
upb_string_unref(str->src);
str->src = NULL;
}
}
upb_string *upb_strdup(upb_string *s); // Forward-declare.
// Returns a string with the same contents as "str". The caller owns a ref on
// the returned string, which may or may not be the same object as "str.
// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED!
INLINE upb_string *upb_string_getref(upb_string *str) {
int refcount = upb_atomic_read(&str->refcount);
if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
// We don't ref the special <0 refcount for static strings.
if (refcount > 0) {
upb_atomic_ref(&str->refcount);
}
return str;
}
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
INLINE bool upb_string_isempty(upb_string *str) {
return !str || upb_string_len(str) == 0;
}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
// upb_string_getrobuf() and upb_string_endread() should be kept as short as
// possible, because any pending upb_string_detach() may be blocked until
// upb_string_endread is called(). No other functions may be called on the
// string during this window except upb_string_len().
INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
INLINE void upb_string_endread(upb_string *str) { (void)str; }
// Convenience method for getting the end of the string. Calls
// upb_string_getrobuf() so inherits the caveats of calling that function.
INLINE const char *upb_string_getbufend(upb_string *str) {
return upb_string_getrobuf(str) + upb_string_len(str);
}
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. The caller MUST own a reference on the given string
// prior to making this call (ie. the caller must have either created the
// string or obtained a reference with upb_string_getref()).
//
// After the function returns, "str" points to a writable string, which is
// either the original string if it had no other references or a newly created
// string if it did have other references.
//
// As a special case, passing a pointer to NULL will allocate a new string.
// This is convenient for the pattern:
//
// upb_string *str = NULL;
// while (x) {
// if (y) {
// upb_string_recycle(&str);
// upb_src_getstr(str);
// }
// }
INLINE void upb_string_recycle(upb_string **_str) {
upb_string *str = *_str;
int r;
if(str && ((r = upb_atomic_read(&str->refcount)) == 1 ||
(r == _UPB_STRING_REFCOUNT_STACK))) {
str->ptr = NULL;
str->len = 0;
_upb_string_release(str);
} else {
//if (!str) {
// printf("!str\n");
//}
//else if (upb_atomic_read(&str->refcount) != 1) { printf("refcount: %d\n", upb_atomic_read(&str->refcount)); }
//else { printf("Some other reason.\n"); }
upb_string_unref(str);
*_str = upb_string_new();
}
}
// The options for setting the contents of a string. These may only be called
// when a string is first created or recycled; once other functions have been
// called on the string, these functions are not allowed until the string is
// recycled.
// Gets a pointer suitable for writing to the string, which is guaranteed to
// have at least "len" bytes of data available. The size of the string will
// become "len".
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
// Replaces the contents of str with the contents of the given printf.
size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format,
va_list args);
INLINE size_t upb_string_vprintf(upb_string *str, const char *format,
va_list args) {
return upb_string_vprintf_at(str, 0, format, args);
}
INLINE size_t upb_string_printf(upb_string *str, const char *format, ...) {
va_list args;
va_start(args, format);
size_t written = upb_string_vprintf(str, format, args);
va_end(args);
return written;
}
// Sets the contents of "str" to be the given substring of "target_str", to
// which the caller must own a ref.
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len);
// Sketch of an API for allowing upb_strings to reference external, unowned
// data. Waiting for a clear use case before actually implementing it.
//
// Makes the string "str" a reference to the given string data. The caller
// guarantees that the given string data will not change or be deleted until a
// matching call to upb_string_detach(), which may block until any concurrent
// readers have finished reading. upb_string_detach() preserves the contents
// of the string by copying the referenced data if there are any other
// referents.
// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
// void upb_string_detach(upb_string *str);
// Allows using upb_strings in printf, ie:
// upb_strptr str = UPB_STRLIT("Hello, World!\n");
// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
#define UPB_STRFMT "%.*s"
// Macros for constructing upb_string objects statically or on the stack. These
// can be used like:
//
// upb_string static_str = UPB_STATIC_STRING("Foo");
//
// int main() {
// upb_string stack_str = UPB_STACK_STRING("Foo");
// // Now:
// // upb_streql(&static_str, &stack_str) == true
// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true
// }
//
// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays,
// but you must not change the underlying data once you've passed the string on:
//
// void foo() {
// char data[] = "ABC123";
// upb_string stack_str = UPB_STACK_STR(data);
// bar(&stack_str);
// data[0] = "B"; // NOT ALLOWED!!
// }
//
// TODO: should the stack business just be like attach/detach? The latter seems
// more flexible, though it does require a stack allocation. Maybe put this off
// until there is a clear use case.
#define UPB_STATIC_STRING(str) \
_UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STATIC_STRING_ARRAY(str) \
_UPB_STRING_INIT(str, sizeof(str), _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STATIC_STRING_LEN(str, len) \
_UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STACK_STRING(str) \
_UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK)
#define UPB_STACK_STRING_LEN(str, len) \
_UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK)
// A convenient way of specifying upb_strings as literals, like:
//
// upb_streql(UPB_STRLIT("expected"), other_str);
//
// However, this requires either C99 compound initializers or C++.
// Must ONLY be called with a string literal as its argument!
//#ifdef __cplusplus
//namespace upb {
//class String : public upb_string {
// // This constructor must ONLY be called with a string literal.
// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {}
//};
//}
//#define UPB_STRLIT(str) upb::String(str)
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
// Returns a singleton empty string.
upb_string *upb_emptystring();
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
// overflow. For the most part these only use the public upb_string interface.
// More efficient than upb_strcmp if all you need is to test equality.
INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
upb_strlen_t len = upb_string_len(s1);
if(len != upb_string_len(s2)) {
return false;
} else {
bool ret =
memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
upb_string_endread(s1);
upb_string_endread(s2);
return ret;
}
}
// Like strcmp().
int upb_strcmp(upb_string *s1, upb_string *s2);
// Compare a upb_string with memory or a NULL-terminated C string.
INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) {
return len == upb_string_len(str) &&
memcmp(upb_string_getrobuf(str), buf, len) == 0;
}
INLINE bool upb_streqlc(upb_string *str, const void *buf) {
// Could be made one-pass.
return upb_streqllen(str, buf, strlen((const char*)buf));
}
// Like upb_strcpy, but copies from a buffer and length.
INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
memcpy(upb_string_getrwbuf(dest, len), src, len);
}
// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
upb_string_endread(src);
}
// Like upb_strcpy, but copies from a NULL-terminated string.
INLINE void upb_strcpyc(upb_string *dest, const void *src) {
// This does two passes over src, but that is necessary unless we want to
// repeatedly re-allocate dst, which seems worse.
upb_strcpylen(dest, src, strlen((const char*)src));
}
// Returns a new string whose contents are a copy of s.
upb_string *upb_strdup(upb_string *s);
// Like upb_strdup(), but duplicates a given buffer and length.
INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
upb_string *s = upb_string_new();
upb_strcpylen(s, src, len);
return s;
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
// Returns a newly-allocated NULL-terminated copy of str.
char *upb_string_newcstr(upb_string *str);
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
// Returns a new string that is a substring of the given string.
INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) {
upb_string *str = upb_string_new();
upb_string_substr(str, s, offset, len);
return str;
}
// Reads an entire file into a newly-allocated string.
upb_string *upb_strreadfile(const char *filename);
// Returns a new string with the contents of the given printf.
upb_string *upb_string_asprintf(const char *format, ...);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -8,61 +8,45 @@
#include "upb_strstream.h"
#include <stdlib.h>
#include "upb_string.h"
/* upb_stringsrc **************************************************************/
static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf,
upb_strlen_t count, upb_status *status) {
upb_stringsrc *src = (upb_stringsrc*)_src;
if (src->offset == upb_string_len(src->str)) {
status->code = UPB_EOF;
return -1;
} else {
upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset);
memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read);
src->offset += to_read;
return to_read;
}
size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
upb_stringsrc *src = _src;
size_t bytes = src->len - ofs;
if (bytes == 0) s->code = UPB_EOF;
return bytes;
}
static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str,
upb_status *status) {
upb_stringsrc *src = (upb_stringsrc*)_src;
if (src->offset == upb_string_len(src->str)) {
status->code = UPB_EOF;
return false;
} else {
upb_strlen_t len = upb_string_len(src->str) - src->offset;
upb_string_substr(str, src->str, src->offset, len);
src->offset += len;
assert(src->offset == upb_string_len(src->str));
return true;
}
void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {
upb_stringsrc *src = _src;
memcpy(dst, src->str + src_ofs, len);
}
const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) {
upb_stringsrc *src = _src;
*len = src->len - ofs;
return src->str + ofs;
}
void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl vtbl = {
upb_stringsrc_read,
upb_stringsrc_getstr,
&upb_stringsrc_fetch,
&upb_stringsrc_read,
&upb_stringsrc_getptr,
NULL, NULL, NULL, NULL
};
upb_bytesrc_init(&s->bytesrc, &vtbl);
s->str = NULL;
}
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) {
if (str != s->str) {
upb_string_unref(s->str);
s->str = upb_string_getref(str);
}
s->offset = 0;
}
void upb_stringsrc_uninit(upb_stringsrc *s) {
upb_string_unref(s->str);
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
s->str = str;
s->len = len;
}
void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc;
@ -72,44 +56,49 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
/* upb_stringsink *************************************************************/
void upb_stringsink_uninit(upb_stringsink *s) {
upb_string_unref(s->str);
free(s->str);
}
// Resets the stringsink to a state where it will append to the given string.
// The string must be newly created or recycled. The stringsink will take a
// reference on the string, so the caller need not ensure that it outlives the
// stringsink. A stringsink can be reset multiple times.
void upb_stringsink_reset(upb_stringsink *s, upb_string *str) {
if (str != s->str) {
upb_string_unref(s->str);
s->str = upb_string_getref(str);
}
// Resize to 0.
upb_string_getrwbuf(s->str, 0);
void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
free(s->str);
s->str = str;
s->len = 0;
s->size = size;
}
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) {
return &s->bytesink;
}
static upb_strlen_t upb_stringsink_vprintf(upb_bytesink *_sink, upb_status *s,
const char *fmt, va_list args) {
(void)s; // No errors can occur.
upb_stringsink *sink = (upb_stringsink*)_sink;
return upb_string_vprintf_at(sink->str, upb_string_len(sink->str), fmt, args);
static int32_t upb_stringsink_vprintf(void *_s, upb_status *status,
const char *fmt, va_list args) {
(void)status; // TODO: report realloc() errors.
upb_stringsink *s = _s;
int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args);
if (ret >= 0) s->len += ret;
return ret;
}
static upb_strlen_t upb_stringsink_putstr(upb_bytesink *_sink, upb_string *str,
upb_status *s) {
(void)s; // No errors can occur.
upb_stringsink *sink = (upb_stringsink*)_sink;
upb_strcat(sink->str, str);
return upb_string_len(str);
bool upb_stringsink_write(void *_s, const char *buf, size_t len,
upb_status *status) {
(void)status; // TODO: report realloc() errors.
upb_stringsink *s = _s;
if (s->len + len > s->size) {
while(s->len + len > s->size) s->size *= 2;
s->str = realloc(s->str, s->size);
}
memcpy(s->str + s->len, buf, len);
s->len += len;
return true;
}
void upb_stringsink_init(upb_stringsink *s) {
static upb_bytesink_vtbl vtbl = {
upb_stringsink_putstr,
upb_stringsink_write,
upb_stringsink_vprintf
};
upb_bytesink_init(&s->bytesink, &vtbl);

@ -21,8 +21,8 @@ extern "C" {
struct _upb_stringsrc {
upb_bytesrc bytesrc;
upb_string *str;
upb_strlen_t offset;
const char *str;
size_t len;
};
typedef struct _upb_stringsrc upb_stringsrc;
@ -33,9 +33,9 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// stringsrc will take a reference on the string, so the caller need not ensure
// that it outlives the stringsrc. A stringsrc can be reset multiple times.
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str);
void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above.
// Returns the upb_bytesrc* for this stringsrc.
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
@ -43,7 +43,8 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
struct _upb_stringsink {
upb_bytesink bytesink;
upb_string *str;
char *str;
size_t len, size;
};
typedef struct _upb_stringsink upb_stringsink;
@ -51,11 +52,14 @@ typedef struct _upb_stringsink upb_stringsink;
void upb_stringsink_init(upb_stringsink *s);
void upb_stringsink_uninit(upb_stringsink *s);
// Resets the stringsink to a state where it will append to the given string.
// The string must be newly created or recycled. The stringsink will take a
// reference on the string, so the caller need not ensure that it outlives the
// stringsink. A stringsink can be reset multiple times.
void upb_stringsink_reset(upb_stringsink *s, upb_string *str);
// Resets the sink's string to "str", which the sink takes ownership of.
// "str" may be NULL, which will make the sink allocate a new string.
void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size);
// Releases ownership of the returned string (which is "len" bytes long) and
// resets the internal string to be empty again (as if reset were called with
// NULL).
const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink();

@ -97,7 +97,7 @@ static uint32_t empty_intbucket(upb_inttable *table)
// The insert routines have a lot more code duplication between int/string
// variants than I would like, but there's just a bit too much that varies to
// parameterize them.
static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) {
static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
assert(upb_inttable_lookup(t, key) == NULL);
upb_inttable_value *table_val;
if (_upb_inttable_isarrkey(t, key)) {
@ -160,7 +160,7 @@ static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
}
}
void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) {
void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
//printf("RESIZE!\n");
// Need to resize. Allocate new table with double the size of however many
@ -181,7 +181,7 @@ void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) {
void upb_inttable_compact(upb_inttable *t) {
// Find the largest array part we can that satisfies the MIN_DENSITY
// definition. For now we just count down powers of two.
upb_inttable_key_t largest_key = 0;
uint32_t largest_key = 0;
for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
i = upb_inttable_next(t, i)) {
largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
@ -260,6 +260,8 @@ upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter) {
/* upb_strtable ***************************************************************/
static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
//fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
assert(i <= (int32_t)upb_table_size(&t->t));
return UPB_INDEX(t->t.entries, i, t->t.entry_size);
}
@ -267,121 +269,134 @@ static uint32_t upb_strtable_size(upb_strtable *t) {
return upb_table_size(&t->t);
}
void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) {
void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
t->t.value_size = valuesize;
size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
upb_table_init(&t->t, size, entsize);
for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
upb_strtable_entry *e = strent(t, i);
e->key = NULL;
e->next = UPB_END_OF_CHAIN;
e->hdr.key = NULL;
e->hdr.next = UPB_END_OF_CHAIN;
}
}
void upb_strtable_free(upb_strtable *t) {
// Free refs from the strtable.
upb_strtable_entry *e = upb_strtable_begin(t);
for(; e; e = upb_strtable_next(t, e)) {
upb_string_unref(e->key);
}
// Free keys from the strtable.
upb_strtable_iter i;
for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
free((char*)upb_strtable_iter_key(&i));
upb_table_free(&t->t);
}
static uint32_t strtable_bucket(upb_strtable *t, upb_string *key)
{
uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
static uint32_t strtable_bucket(upb_strtable *t, const char *key) {
uint32_t hash = MurmurHash2(key, strlen(key), 0);
return (hash & t->t.mask);
}
void *upb_strtable_lookup(upb_strtable *t, upb_string *key)
{
void *upb_strtable_lookup(upb_strtable *t, const char *key) {
uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *e;
do {
e = strent(t, bucket);
if(e->key && upb_streql(e->key, key)) return e;
} while((bucket = e->next) != UPB_END_OF_CHAIN);
if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
} while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
return NULL;
}
static uint32_t empty_strbucket(upb_strtable *table)
{
void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len) {
// TODO: improve.
char key2[len+1];
memcpy(key2, key, len);
key2[len] = '\0';
return upb_strtable_lookup(t, key2);
}
static uint32_t empty_strbucket(upb_strtable *table) {
// TODO: does it matter that this is biased towards the front of the table?
for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
upb_strtable_entry *e = strent(table, i);
if(!e->key) return i;
if(!e->hdr.key) return i;
}
assert(false);
return 0;
}
static void strinsert(upb_strtable *t, upb_strtable_entry *e)
{
assert(upb_strtable_lookup(t, e->key) == NULL);
e->key = upb_string_getref(e->key);
static void strinsert(upb_strtable *t, const char *key, const void *val) {
assert(upb_strtable_lookup(t, key) == NULL);
t->t.count++;
uint32_t bucket = strtable_bucket(t, e->key);
uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *table_e = strent(t, bucket);
if(table_e->key) { /* Collision. */
if(bucket == strtable_bucket(t, table_e->key)) {
if(table_e->hdr.key) { /* Collision. */
if(bucket == strtable_bucket(t, table_e->hdr.key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */
uint32_t empty_bucket = empty_strbucket(t);
while (table_e->next != UPB_END_OF_CHAIN)
table_e = strent(t, table_e->next);
table_e->next = empty_bucket;
while (table_e->hdr.next != UPB_END_OF_CHAIN)
table_e = strent(t, table_e->hdr.next);
table_e->hdr.next = empty_bucket;
table_e = strent(t, empty_bucket);
} else {
/* Existing element is not in its main position. Move it to an empty
* slot and put our element in its main position. */
uint32_t empty_bucket = empty_strbucket(t);
uint32_t evictee_bucket = strtable_bucket(t, table_e->key);
uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
while(1) {
assert(evictee_e->key);
assert(evictee_e->next != UPB_END_OF_CHAIN);
if(evictee_e->next == bucket) {
evictee_e->next = empty_bucket;
assert(evictee_e->hdr.key);
assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
if(evictee_e->hdr.next == bucket) {
evictee_e->hdr.next = empty_bucket;
break;
}
evictee_e = strent(t, evictee_e->next);
evictee_e = strent(t, evictee_e->hdr.next);
}
/* table_e remains set to our mainpos. */
}
}
memcpy(table_e, e, t->t.entry_size);
table_e->next = UPB_END_OF_CHAIN;
//printf("Looking up, string=" UPB_STRFMT "...\n", UPB_STRARG(e->key));
assert(upb_strtable_lookup(t, e->key) == table_e);
//fprintf(stderr, "val: %p\n", val);
//fprintf(stderr, "val size: %d\n", t->t.value_size);
memcpy(&table_e->val, val, t->t.value_size);
table_e->hdr.key = strdup(key);
table_e->hdr.next = UPB_END_OF_CHAIN;
//fprintf(stderr, "Looking up, string=%s...\n", key);
assert(upb_strtable_lookup(t, key) == &table_e->val);
//printf("Yay!\n");
}
void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e)
{
void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
// Need to resize. New table of double the size, add old elements to it.
//printf("RESIZE!!\n");
upb_strtable new_table;
upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size);
upb_strtable_entry *old_e;
for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e))
strinsert(&new_table, old_e);
upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
upb_strtable_iter i;
upb_strtable_begin(&i, t);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
strinsert(&new_table,
upb_strtable_iter_key(&i),
upb_strtable_iter_value(&i));
}
upb_strtable_free(t);
*t = new_table;
}
strinsert(t, e);
strinsert(t, key, val);
}
void *upb_strtable_begin(upb_strtable *t) {
return upb_strtable_next(t, strent(t, -1));
void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t) {
i->e = strent(t, -1);
i->t = t;
upb_strtable_next(i);
}
void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) {
upb_strtable_entry *end = strent(t, upb_strtable_size(t));
void upb_strtable_next(upb_strtable_iter *i) {
upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
upb_strtable_entry *cur = i->e;
do {
cur = (void*)((char*)cur + t->t.entry_size);
if(cur == end) return NULL;
} while(cur->key == NULL);
return cur;
cur = (void*)((char*)cur + i->t->t.entry_size);
if(cur == end) { i->e = NULL; return; }
} while(cur->hdr.key == NULL);
i->e = cur;
}
#ifdef UPB_UNALIGNED_READS_OK

@ -18,14 +18,11 @@
#include <assert.h>
#include "upb.h"
#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef uint32_t upb_inttable_key_t;
#define UPB_END_OF_CHAIN (uint32_t)-1
typedef struct {
@ -34,7 +31,7 @@ typedef struct {
} upb_inttable_value;
typedef struct {
upb_inttable_key_t key;
uint32_t key;
uint32_t next; // Internal chaining.
} upb_inttable_header;
@ -48,8 +45,13 @@ typedef struct {
// performance by letting us compare hashes before comparing lengths or the
// strings themselves.
typedef struct {
upb_string *key; // We own a ref.
uint32_t next; // Internal chaining.
char *key; // We own, nullz. TODO: store explicit len?
uint32_t next; // Internal chaining.
} upb_strtable_header;
typedef struct {
upb_strtable_header hdr;
uint32_t val; // Val is at least 32 bits.
} upb_strtable_entry;
typedef struct {
@ -81,7 +83,7 @@ typedef struct {
// when looked up!
void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
void upb_inttable_free(upb_inttable *table);
void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); // TODO: update
void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
void upb_strtable_free(upb_strtable *table);
// Number of values in the hash table.
@ -97,11 +99,13 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) {
// not already exist in the hash table. The data will be copied from val into
// the hashtable (the amount of data copied comes from value_size when the
// table was constructed). Therefore the data at val may be freed once the
// call returns. For string tables, the table takes a ref on str.
// call returns. For string tables, the table takes ownership of the string.
//
// WARNING: the lowest bit of val is reserved and will be overwritten!
void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val);
void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update
void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
// TODO: may want to allow for more complex keys with custom hash/comparison
// functions.
void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
void upb_inttable_compact(upb_inttable *t);
INLINE void upb_strtable_clear(upb_strtable *t) {
// TODO: improve.
@ -110,14 +114,14 @@ INLINE void upb_strtable_clear(upb_strtable *t) {
upb_strtable_init(t, 8, entry_size);
}
INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, uint32_t k) {
uint32_t bucket = k & t->t.mask; // Identity hash for ints.
assert(bucket != UPB_END_OF_CHAIN);
return bucket;
}
// Returns true if this key belongs in the array part of the table.
INLINE bool _upb_inttable_isarrkey(upb_inttable *t, upb_inttable_key_t k) {
INLINE bool _upb_inttable_isarrkey(upb_inttable *t, uint32_t k) {
return (k < t->array_size);
}
@ -162,21 +166,44 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
}
void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len);
void *upb_strtable_lookup(upb_strtable *t, const char *key);
/* upb_strtable_iter **********************************************************/
// Strtable iteration. Order is undefined. Insertions invalidate iterators.
// upb_strtable_iter i;
// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
// const char *key = upb_strtable_iter_key(&i);
// const myval *val = upb_strtable_iter_value(&i);
// // ...
// }
typedef struct {
upb_strtable *t;
upb_strtable_entry *e;
} upb_strtable_iter;
void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
return i->e->hdr.key;
}
INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
return &i->e->val;
}
// Provides iteration over the table. The order in which the entries are
// returned is undefined. Insertions invalidate iterators.
void *upb_strtable_begin(upb_strtable *t);
void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur);
/* upb_inttable_iter **********************************************************/
// Inttable iteration (should update strtable iteration to use this scheme too).
// The order is undefined.
// Inttable iteration. Order is undefined. Insertions invalidate iterators.
// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
// i = upb_inttable_next(t, i)) {
// // ...
// }
typedef struct {
upb_inttable_key_t key;
uint32_t key;
upb_inttable_value *value;
bool array_part;
} upb_inttable_iter;
@ -184,7 +211,7 @@ typedef struct {
upb_inttable_iter upb_inttable_begin(upb_inttable *t);
upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter);
INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
INLINE upb_inttable_key_t upb_inttable_iter_key(upb_inttable_iter iter) {
INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
return iter.key;
}
INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {

@ -21,12 +21,15 @@ struct _upb_textprinter {
#define CHECK(x) if ((x) < 0) goto err;
static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release.
// TODO; we could read directly fraom a bytesrc's buffer instead.
// TODO; we could write directly into a bytesink's buffer instead.
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
const char *src = upb_string_getrobuf(str), *end = src + upb_string_len(str);
char buf[strref->len], *src = buf;
char *end = src + strref->len;
upb_strref_read(strref, src);
// I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also.
@ -35,8 +38,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
for (; src < end; src++) {
if (dstend - dst < 4) {
upb_string str = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf);
CHECK(upb_bytesink_putstr(p->bytesink, &str, &p->status));
CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
dst = dstbuf;
}
@ -64,8 +66,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
last_hex_escape = is_hex_escape;
}
// Flush remaining data.
upb_string outstr = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf);
CHECK(upb_bytesink_putstr(p->bytesink, &outstr, &p->status));
CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
return 0;
err:
return -1;
@ -74,7 +75,7 @@ err:
static int upb_textprinter_indent(upb_textprinter *p) {
if(!p->single_line)
for(int i = 0; i < p->indent_depth; i++)
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status));
CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
return 0;
err:
return -1;
@ -82,9 +83,9 @@ err:
static int upb_textprinter_endfield(upb_textprinter *p) {
if(p->single_line) {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status));
CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
} else {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status));
CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
}
return 0;
err:
@ -96,7 +97,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
upb_textprinter *p = _p;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p);
CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name)));
CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
#define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) {
@ -118,12 +119,11 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
CASE("%" PRIu32, uint32);
case UPB_TYPE(ENUM): {
upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
upb_string *enum_label =
upb_enumdef_iton(enum_def, upb_value_getint32(val));
if (enum_label) {
const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
if (label) {
// We found a corresponding string for this enum. Otherwise we fall
// through to the int32 code path.
CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status));
CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
break;
}
}
@ -134,12 +134,13 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
case UPB_TYPE(BOOL):
CASE("%hhu", bool);
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
CHECK(upb_textprinter_putescaped(p, upb_value_getstr(val),
case UPB_TYPE(BYTES): {
CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
f->type == UPB_TYPE(STRING)));
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
break;
}
}
upb_textprinter_endfield(p);
return UPB_CONTINUE;
@ -151,11 +152,10 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p);
bool ret = upb_bytesink_printf(p->bytesink, &p->status,
UPB_STRFMT " {", UPB_STRARG(f->name));
bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
if (!ret) return UPB_SBREAK;
if (!p->single_line)
upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status);
upb_bytesink_writestr(p->bytesink, "\n", &p->status);
p->indent_depth++;
return UPB_CONTINUE_WITH(_p);
}
@ -165,7 +165,7 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p;
p->indent_depth--;
upb_textprinter_indent(p);
upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status);
upb_bytesink_writestr(p->bytesink, "}", &p->status);
upb_textprinter_endfield(p);
return UPB_CONTINUE;
}

@ -83,16 +83,13 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \
uint64_t b = 0; \
upb_decoderet r = {p, 0}; \
memcpy(&b, r.p, 2); \
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \
r.p = p + 2; \
if ((b & 0x8000) == 0) return r; \
return decode_max8_function(r); \
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
uint8_t *p = (uint8_t*)_p; \
if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \
if ((*(p + 1) & 0x80) == 0) return r; \
return decode_max8_function(r); \
}
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);

@ -1,4 +1,5 @@
#include <stdlib.h>
#include "upb_decoder.h"
#include "upb_textprinter.h"
#include "upb_stdio.h"
@ -11,20 +12,21 @@ int main(int argc, char *argv[]) {
}
upb_symtab *symtab = upb_symtab_new();
upb_string *desc = upb_strreadfile(argv[1]);
size_t desc_len;
const char *desc = upb_readfile(argv[1], &desc_len);
if (!desc) {
fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
return 1;
}
upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(symtab, desc, &status);
upb_read_descriptor(symtab, desc, desc_len, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing descriptor: ");
upb_printerr(&status);
return 1;
}
upb_string_unref(desc);
free((void*)desc);
upb_string *name = upb_strdupc(argv[2]);
upb_def *md = upb_symtab_lookup(symtab, name);
@ -40,19 +42,20 @@ int main(int argc, char *argv[]) {
return 1;
}
upb_stdio *in = upb_stdio_new();
upb_stdio_reset(in, stdin);
upb_stdio *out = upb_stdio_new();
upb_stdio_reset(out, stdout);
upb_stdio in, out;
upb_stdio_init(&in);
upb_stdio_init(&out);
upb_stdio_reset(&in, stdin);
upb_stdio_reset(&out, stdout);
upb_handlers *handlers = upb_handlers_new();
upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(out), false);
upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
upb_textprinter_reghandlers(handlers, m);
upb_decoder d;
upb_decoder_initforhandlers(&d, handlers);
upb_decoder_reset(&d, upb_stdio_bytesrc(in), p);
upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UINT64_MAX, p);
upb_clearerr(&status);
upb_decoder_decode(&d, &status);
@ -63,8 +66,8 @@ int main(int argc, char *argv[]) {
}
upb_status_uninit(&status);
upb_stdio_free(in);
upb_stdio_free(out);
upb_stdio_uninit(&in);
upb_stdio_uninit(&out);
upb_decoder_uninit(&d);
upb_textprinter_free(p);
upb_def_unref(UPB_UPCAST(m));

@ -1,126 +0,0 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_string.h"
char static_str[] = "Static string.";
upb_string static_upbstr = UPB_STATIC_STRING(static_str);
static void test_static() {
// Static string is initialized appropriately.
assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string.")));
// Taking a ref on a static string returns the same string, and repeated
// refs don't get the string in a confused state.
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
// Unreffing a static string does nothing (is not harmful).
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
// Recycling a static string returns a new string (that can be modified).
upb_string *str = &static_upbstr;
upb_string_recycle(&str);
assert(str != &static_upbstr);
upb_string_unref(str);
}
static void test_dynamic() {
upb_string *str = upb_string_new();
assert(str != NULL);
upb_string_unref(str);
// Can also create a string by recycle(NULL).
str = NULL;
upb_string_recycle(&str);
assert(str != NULL);
// Take a ref and recycle; should create a new string and release a ref
// on the old one.
upb_string *strcp = upb_string_getref(str);
assert(strcp == str);
assert(upb_atomic_read(&str->refcount) == 2);
upb_string_recycle(&str);
assert(strcp != str);
assert(upb_atomic_read(&str->refcount) == 1);
assert(upb_atomic_read(&strcp->refcount) == 1);
upb_string_unref(strcp);
upb_strcpyc(str, static_str);
assert(upb_string_len(str) == (sizeof(static_str) - 1));
const char *robuf = upb_string_getrobuf(str);
assert(robuf != NULL);
assert(upb_streqlc(str, static_str));
upb_string_endread(str);
upb_string *str2 = str;
upb_string_recycle(&str2);
// No other referents, so should return the same string.
assert(str2 == str);
// Write a shorter string, the same memory should be reused.
upb_strcpyc(str, "XX");
const char *robuf2 = upb_string_getrobuf(str);
assert(robuf2 == robuf);
assert(upb_streqlc(str, "XX"));
assert(upb_streql(str, UPB_STRLIT("XX")));
// Make string alias part of another string.
str2 = upb_strdupc("WXYZ");
upb_string_recycle(&str);
upb_string_substr(str, str2, 1, 2);
assert(upb_string_len(str) == 2);
assert(upb_string_len(str2) == 4);
// The two string should be aliasing the same data.
const char *robuf3 = upb_string_getrobuf(str);
const char *robuf4 = upb_string_getrobuf(str2);
assert(robuf3 == robuf4 + 1);
// The aliased string should have an extra ref.
assert(upb_atomic_read(&str2->refcount) == 2);
// Recycling str should eliminate the extra ref.
upb_string_recycle(&str);
assert(upb_atomic_read(&str2->refcount) == 1);
// Resetting str should reuse its old data.
upb_strcpyc(str, "XX");
const char *robuf5 = upb_string_getrobuf(str);
assert(robuf5 == robuf);
// Resetting str to something very long should require new data to be
// allocated.
upb_string_recycle(&str);
const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
upb_strcpyc(str, longstring);
const char *robuf6 = upb_string_getrobuf(str);
assert(robuf6 != robuf);
assert(upb_streqlc(str, longstring));
// Test printf.
upb_string_recycle(&str);
upb_string_printf(str, "Number: %d, String: %s", 5, "YO!");
assert(upb_streqlc(str, "Number: 5, String: YO!"));
// Test asprintf
upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n",
"Josh", UPB_STRARG(str));
const char expected[] = "Yo Josh: Number: 5, String: YO!\n";
assert(upb_streqlc(str3, expected));
upb_string_unref(str);
upb_string_unref(str2);
upb_string_unref(str3);
// Unref of NULL is harmless.
upb_string_unref(NULL);
}
int main() {
test_static();
test_dynamic();
}

@ -1,7 +1,6 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_table.h"
#include "upb_string.h"
#include "test_util.h"
#include <assert.h>
#include <map>
@ -23,7 +22,6 @@ typedef struct {
} inttable_entry;
typedef struct {
upb_strtable_entry e;
int32_t value; /* ASCII Value of first letter */
} strtable_entry;
@ -47,34 +45,29 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
all.insert(key);
strtable_entry e;
e.value = key[0];
upb_string *str = upb_strduplen(key.c_str(), key.size());
e.e.key = str;
upb_strtable_insert(&table, &e.e);
upb_string_unref(str); // The table still owns a ref.
upb_strtable_insert(&table, key.c_str(), &e);
m[key] = key[0];
}
/* Test correctness. */
for(uint32_t i = 0; i < keys.size(); i++) {
const string& key = keys[i];
upb_string *str = upb_strduplen(key.c_str(), key.size());
strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str);
printf("Looking up " UPB_STRFMT "...\n", UPB_STRARG(str));
strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, key.c_str());
printf("Looking up %s...\n", key.c_str());
if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
assert(e);
assert(upb_streql(e->e.key, str));
assert(e->value == key[0]);
assert(m[key] == key[0]);
} else {
assert(e == NULL);
}
upb_string_unref(str);
}
strtable_entry *e;
for(e = (strtable_entry*)upb_strtable_begin(&table); e;
e = (strtable_entry*)upb_strtable_next(&table, &e->e)) {
string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key));
upb_strtable_iter iter;
for(upb_strtable_begin(&iter, &table); !upb_strtable_done(&iter);
upb_strtable_next(&iter)) {
const char *key = upb_strtable_iter_key(&iter);
string tmp(key, strlen(key));
std::set<string>::iterator i = all.find(tmp);
assert(i != all.end());
all.erase(i);

@ -71,18 +71,17 @@ void compare_arrays(const google::protobuf::Reflection *r,
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
upb_string *upbstr = upb_value_getstr(v);
std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr));
string_size += upb_string_len(upbstr);
upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
std::string str2(upbstr->ptr, upbstr->len);
string_size += upbstr->len;
ASSERT(str == str2);
break;
}
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE):
// XXX: getstr
ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
upb_value_getstr(v), upb_downcast_msgdef(upb_f->def));
upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
}
}
ASSERT(upb_seq_done(iter));
@ -129,9 +128,9 @@ void compare_values(const google::protobuf::Reflection *r,
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
std::string str = r->GetString(proto2_msg, proto2_f);
upb_string *upbstr = upb_value_getstr(v);
std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr));
string_size += upb_string_len(upbstr);
upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
std::string str2(upbstr->ptr, upbstr->len);
string_size += upbstr->len;
ASSERT(str == str2);
break;
}
@ -139,7 +138,7 @@ void compare_values(const google::protobuf::Reflection *r,
case UPB_TYPE(MESSAGE):
// XXX: getstr
compare(r->GetMessage(proto2_msg, proto2_f),
upb_value_getstr(v), upb_downcast_msgdef(upb_f->def));
upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
}
}
@ -159,9 +158,7 @@ void compare(const google::protobuf::Message& proto2_msg,
ASSERT(upb_f);
ASSERT(proto2_f);
ASSERT(upb_f->number == proto2_f->number());
ASSERT(std::string(upb_string_getrobuf(upb_f->name),
upb_string_len(upb_f->name)) ==
proto2_f->name());
ASSERT(std::string(upb_f->name) == proto2_f->name());
ASSERT(upb_f->type == proto2_f->type());
ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated());
@ -183,22 +180,22 @@ void compare(const google::protobuf::Message& proto2_msg,
void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
void *upb_msg, upb_msgdef *upb_md,
upb_string *str)
const char *str, size_t len)
{
// Parse to both proto2 and upb.
ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str)));
ASSERT(proto2_msg->ParseFromArray(str, len));
upb_status status = UPB_STATUS_INIT;
upb_msg_clear(upb_msg, upb_md);
upb_strtomsg(str, upb_msg, upb_md, &status);
upb_strtomsg(str, len, upb_msg, upb_md, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing test protobuf: ");
upb_printerr(&status);
upb_status_print(&status, stderr);
exit(1);
}
string_size = 0;
compare(*proto2_msg, upb_msg, upb_md);
printf("Total size: %d, string size: %zd (%0.2f%%)\n", upb_string_len(str),
string_size, (double)string_size / upb_string_len(str) * 100);
printf("Total size: %zd, string size: %zd (%0.2f%%)\n", len,
string_size, (double)string_size / len * 100);
upb_status_uninit(&status);
}
@ -221,31 +218,30 @@ int main(int argc, char *argv[])
// Initialize upb state, parse descriptor.
upb_status status = UPB_STATUS_INIT;
upb_symtab *symtab = upb_symtab_new();
upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
size_t fds_len;
const char *fds = upb_readfile(MESSAGE_DESCRIPTOR_FILE, &fds_len);
if(fds == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
return 1;
}
upb_read_descriptor(symtab, fds, &status);
upb_read_descriptor(symtab, fds, fds_len, &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": ");
upb_printerr(&status);
upb_status_print(&status, stderr);
return 1;
}
upb_string_unref(fds);
free((void*)fds);
upb_string *proto_name = upb_strdupc(MESSAGE_NAME);
upb_def *def = upb_symtab_lookup(symtab, proto_name);
upb_def *def = upb_symtab_lookup(symtab, MESSAGE_NAME);
upb_msgdef *msgdef;
if(!def || !(msgdef = upb_dyncast_msgdef(def))) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(proto_name));
fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
return 1;
}
upb_string_unref(proto_name);
// Read the message data itself.
upb_string *str = upb_strreadfile(MESSAGE_FILE);
size_t len;
const char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return 1;
@ -254,13 +250,13 @@ int main(int argc, char *argv[])
// Run twice to test proper object reuse.
MESSAGE_CIDENT proto2_msg;
void *upb_msg = upb_stdmsg_new(msgdef);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
printf("All tests passed, %d assertions.\n", num_assertions);
upb_stdmsg_free(upb_msg, msgdef);
upb_def_unref(UPB_UPCAST(msgdef));
upb_string_unref(str);
free((void*)str);
upb_symtab_unref(symtab);
upb_status_uninit(&status);
google::protobuf::ShutdownProtobufLibrary();

@ -11,16 +11,18 @@
static upb_symtab *load_test_proto() {
upb_symtab *s = upb_symtab_new();
ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb");
size_t len;
char *descriptor = upb_readfile("tests/test.proto.pb", &len);
if(!descriptor) {
fprintf(stderr, "Couldn't read input file tests/test.proto.pb\n");
exit(1);
}
upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(s, descriptor, &status);
upb_read_descriptor(s, descriptor, len, &status);
upb_status_print(&status, stderr);
ASSERT(upb_ok(&status));
upb_status_uninit(&status);
upb_string_unref(descriptor);
free(descriptor);
return s;
}
@ -33,9 +35,7 @@ static upb_flow_t upb_test_onvalue(void *closure, upb_value fval, upb_value val)
static void test_upb_jit() {
upb_symtab *s = load_test_proto();
upb_string *symname = upb_strdupc("SimplePrimitives");
upb_def *def = upb_symtab_lookup(s, symname);
upb_string_unref(symname);
upb_def *def = upb_symtab_lookup(s, "SimplePrimitives");
ASSERT(def);
upb_handlers *h = upb_handlers_new();
@ -54,9 +54,7 @@ static void test_upb_symtab() {
// Test cycle detection by making a cyclic def's main refcount go to zero
// and then be incremented to one again.
upb_string *symname = upb_strdupc("A");
upb_def *def = upb_symtab_lookup(s, symname);
upb_string_unref(symname);
upb_def *def = upb_symtab_lookup(s, "A");
ASSERT(def);
upb_symtab_unref(s);
upb_msgdef *m = upb_downcast_msgdef(def);

Loading…
Cancel
Save