Major refactoring: upb_string is gone in favor of upb_strref.

pull/13171/head
Joshua Haberman 14 years ago
parent 559e23c796
commit 6a1f3a6693
  1. 3
      Makefile
  2. 35
      benchmarks/parsestream.upb_table.c
  3. 44
      benchmarks/parsetostruct.upb_table.c
  4. 80
      src/upb.c
  5. 110
      src/upb.h
  6. 213
      src/upb_bytestream.h
  7. 276
      src/upb_decoder.c
  8. 75
      src/upb_decoder.h
  9. 28
      src/upb_decoder_x86.dasc
  10. 205
      src/upb_def.c
  11. 49
      src/upb_def.h
  12. 188
      src/upb_descriptor.c
  13. 6
      src/upb_descriptor.h
  14. 49
      src/upb_glue.c
  15. 20
      src/upb_glue.h
  16. 25
      src/upb_handlers.c
  17. 9
      src/upb_handlers.h
  18. 53
      src/upb_msg.c
  19. 6
      src/upb_msg.h
  20. 168
      src/upb_stdio.c
  21. 54
      src/upb_stdio.h
  22. 164
      src/upb_string.c
  23. 394
      src/upb_string.h
  24. 105
      src/upb_strstream.c
  25. 24
      src/upb_strstream.h
  26. 127
      src/upb_table.c
  27. 69
      src/upb_table.h
  28. 44
      src/upb_textprinter.c
  29. 17
      src/upb_varint.h
  30. 25
      tests/test_decoder.c
  31. 126
      tests/test_string.c
  32. 23
      tests/test_table.cc
  33. 58
      tests/test_vs_proto2.cc
  34. 16
      tests/tests.c

@ -75,7 +75,6 @@ CORE= \
src/upb_handlers.c \ src/upb_handlers.c \
src/upb_descriptor.c \ src/upb_descriptor.c \
src/upb_table.c \ src/upb_table.c \
src/upb_string.c \
src/upb_def.c \ src/upb_def.c \
src/upb_msg.c \ src/upb_msg.c \
src/upb_varint.c \ src/upb_varint.c \
@ -100,7 +99,6 @@ BENCHMARKS_SRC= \
TESTS_SRC= \ TESTS_SRC= \
tests/test_decoder.c \ tests/test_decoder.c \
tests/test_def.c \ tests/test_def.c \
tests/test_string.c \
tests/tests.c \ tests/tests.c \
tests/tests_varint.c \ tests/tests_varint.c \
@ -212,7 +210,6 @@ tests/test.proto.pb: tests/test.proto
protoc tests/test.proto -otests/test.proto.pb protoc tests/test.proto -otests/test.proto.pb
SIMPLE_TESTS= \ SIMPLE_TESTS= \
tests/test_string \
tests/test_def \ tests/test_def \
tests/test_varint \ tests/test_varint \
tests/tests \ tests/tests \

@ -1,12 +1,14 @@
#include "main.c" #include "main.c"
#include <stdlib.h>
#include "upb_def.h" #include "upb_def.h"
#include "upb_decoder.h" #include "upb_decoder.h"
#include "upb_strstream.h" #include "upb_strstream.h"
#include "upb_glue.h" #include "upb_glue.h"
static upb_string *input_str; static char *input_str;
static size_t input_len;
static upb_msgdef *def; static upb_msgdef *def;
static upb_decoder decoder; static upb_decoder decoder;
static upb_stringsrc stringsrc; static upb_stringsrc stringsrc;
@ -29,32 +31,21 @@ static bool initialize()
// Initialize upb state, decode descriptor. // Initialize upb state, decode descriptor.
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_symtab *s = upb_symtab_new(); upb_symtab *s = upb_symtab_new();
upb_read_descriptorfile(s, MESSAGE_DESCRIPTOR_FILE, &status);
upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(fds_str == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"),
upb_printerr(&status);
return false;
}
upb_read_descriptor(s, fds_str, &status);
upb_string_unref(fds_str);
if(!upb_ok(&status)) { if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); upb_status_print(&status, stderr);
upb_printerr(&status);
return false; return false;
} }
def = upb_dyncast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); def = upb_dyncast_msgdef(upb_symtab_lookup(s, MESSAGE_NAME));
if(!def) { if(!def) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
UPB_STRARG(UPB_STRLIT(MESSAGE_NAME)));
return false; return false;
} }
upb_symtab_unref(s); upb_symtab_unref(s);
// Read the message data itself. // Read the message data itself.
input_str = upb_strreadfile(MESSAGE_FILE); input_str = upb_readfile(MESSAGE_FILE, &input_len);
if(input_str == NULL) { if(input_str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false; return false;
@ -72,7 +63,7 @@ static bool initialize()
static void cleanup() static void cleanup()
{ {
upb_string_unref(input_str); free(input_str);
upb_def_unref(UPB_UPCAST(def)); upb_def_unref(UPB_UPCAST(def));
upb_decoder_uninit(&decoder); upb_decoder_uninit(&decoder);
upb_stringsrc_uninit(&stringsrc); upb_stringsrc_uninit(&stringsrc);
@ -82,14 +73,14 @@ static size_t run(int i)
{ {
(void)i; (void)i;
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_stringsrc_reset(&stringsrc, input_str); upb_stringsrc_reset(&stringsrc, input_str, input_len);
upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), NULL); upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), 0, UINT64_MAX, NULL);
upb_decoder_decode(&decoder, &status); upb_decoder_decode(&decoder, &status);
if(!upb_ok(&status)) goto err; if(!upb_ok(&status)) goto err;
return upb_string_len(input_str); return input_len;
err: err:
fprintf(stderr, "Decode error: "); fprintf(stderr, "Decode error: ");
upb_printerr(&status); upb_status_print(&status, stderr);
return 0; return 0;
} }

@ -7,8 +7,8 @@
#include "upb_glue.h" #include "upb_glue.h"
#include "upb_msg.h" #include "upb_msg.h"
static upb_string *input_str;
static upb_msgdef *def; static upb_msgdef *def;
static size_t len;
static void *msg; static void *msg;
static upb_stringsrc strsrc; static upb_stringsrc strsrc;
static upb_decoder d; static upb_decoder d;
@ -18,33 +18,22 @@ static bool initialize()
// Initialize upb state, decode descriptor. // Initialize upb state, decode descriptor.
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_symtab *s = upb_symtab_new(); upb_symtab *s = upb_symtab_new();
upb_read_descriptorfile(s, MESSAGE_DESCRIPTOR_FILE, &status);
upb_string *fds_str = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(fds_str == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ":"),
upb_printerr(&status);
return false;
}
upb_read_descriptor(s, fds_str, &status);
upb_string_unref(fds_str);
if(!upb_ok(&status)) { if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ":"); upb_status_print(&status, stderr);
upb_printerr(&status);
return false; return false;
} }
def = upb_dyncast_msgdef(upb_symtab_lookup(s, UPB_STRLIT(MESSAGE_NAME))); def = upb_dyncast_msgdef(upb_symtab_lookup(s, MESSAGE_NAME));
if(!def) { if(!def) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
UPB_STRARG(UPB_STRLIT(MESSAGE_NAME)));
return false; return false;
} }
upb_symtab_unref(s); upb_symtab_unref(s);
// Read the message data itself. // Read the message data itself.
input_str = upb_strreadfile(MESSAGE_FILE); char *str = upb_readfile(MESSAGE_FILE, &len);
if(input_str == NULL) { if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false; return false;
} }
@ -52,25 +41,17 @@ static bool initialize()
msg = upb_stdmsg_new(def); msg = upb_stdmsg_new(def);
upb_stringsrc_init(&strsrc); upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder_initformsgdef(&d, def); upb_decoder_initformsgdef(&d, def);
if (!BYREF) { if (!BYREF) {
// Pretend the input string is stack-allocated, which will force its data // TODO: use byref/byval accessors.
// to be copied instead of referenced. There is no good reason to do this,
// except to benchmark against proto2 more fairly, which in its open-source
// release does not support referencing the input string.
input_str->refcount.v = _UPB_STRING_REFCOUNT_STACK;
} }
return true; return true;
} }
static void cleanup() static void cleanup()
{ {
if (!BYREF) {
// Undo our fabrication from before.
input_str->refcount.v = 1;
}
upb_string_unref(input_str);
upb_stdmsg_free(msg, def); upb_stdmsg_free(msg, def);
upb_def_unref(UPB_UPCAST(def)); upb_def_unref(UPB_UPCAST(def));
upb_stringsrc_uninit(&strsrc); upb_stringsrc_uninit(&strsrc);
@ -82,14 +63,13 @@ static size_t run(int i)
(void)i; (void)i;
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_msg_clear(msg, def); upb_msg_clear(msg, def);
upb_stringsrc_reset(&strsrc, input_str); upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg);
upb_decoder_decode(&d, &status); upb_decoder_decode(&d, &status);
if(!upb_ok(&status)) goto err; if(!upb_ok(&status)) goto err;
return upb_string_len(input_str); return len;
err: err:
fprintf(stderr, "Decode error: "); fprintf(stderr, "Decode error: ");
upb_printerr(&status); upb_status_print(&status, stderr);
return 0; return 0;
} }

@ -5,19 +5,21 @@
* Author: Josh Haberman <jhaberman@gmail.com> * Author: Josh Haberman <jhaberman@gmail.com>
*/ */
#include <errno.h>
#include <stdarg.h> #include <stdarg.h>
#include <stddef.h> #include <stddef.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include "descriptor_const.h" #include "descriptor_const.h"
#include "upb.h" #include "upb.h"
#include "upb_string.h" #include "upb_bytestream.h"
#define alignof(t) offsetof(struct { char c; t x; }, x) #define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(wire_type, ctype, inmemory_type) \ #define TYPE_INFO(wire_type, ctype, inmemory_type) \
{alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype}, {alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
const upb_type_info upb_types[] = { const upb_type_info upb_types[] = {
{0, 0, 0, 0, ""}, // There is no type 0. TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, MESSAGE) // ENDGROUP (fake)
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64 TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
@ -42,39 +44,79 @@ const upb_type_info upb_types[] = {
#ifdef NDEBUG #ifdef NDEBUG
upb_value UPB_NO_VALUE = {{0}}; upb_value UPB_NO_VALUE = {{0}};
#else #else
upb_value UPB_NO_VALUE = {{0}, UPB_VALUETYPE_RAW}; upb_value UPB_NO_VALUE = {{0}, -1};
#endif #endif
void upb_seterr(upb_status *status, enum upb_status_code code, void upb_status_init(upb_status *status) {
const char *msg, ...) { status->buf = NULL;
status->code = code; upb_status_clear(status);
upb_string_recycle(&status->str); }
void upb_status_uninit(upb_status *status) {
free(status->buf);
}
void upb_status_setf(upb_status *s, enum upb_status_code code,
const char *msg, ...) {
s->code = code;
va_list args; va_list args;
va_start(args, msg); va_start(args, msg);
upb_string_vprintf(status->str, msg, args); upb_vrprintf(&s->buf, &s->bufsize, 0, msg, args);
va_end(args); va_end(args);
s->str = s->buf;
} }
void upb_copyerr(upb_status *to, upb_status *from) void upb_status_copy(upb_status *to, upb_status *from) {
{
to->code = from->code; to->code = from->code;
if(from->str) to->str = upb_string_getref(from->str); if (from->str) {
if (to->bufsize < from->bufsize) {
to->bufsize = from->bufsize;
to->buf = realloc(to->buf, to->bufsize);
to->str = to->buf;
}
memcpy(to->str, from->str, from->bufsize);
} else {
to->str = NULL;
}
} }
void upb_clearerr(upb_status *status) { void upb_status_clear(upb_status *status) {
status->code = UPB_OK; status->code = UPB_OK;
if (status->str) upb_string_recycle(&status->str); status->str = NULL;
} }
void upb_printerr(upb_status *status) { void upb_status_print(upb_status *status, FILE *f) {
if(status->str) { if(status->str) {
fprintf(stderr, "code: %d, msg: " UPB_STRFMT "\n", fprintf(f, "code: %d, msg: %s\n", status->code, status->str);
status->code, UPB_STRARG(status->str));
} else { } else {
fprintf(stderr, "code: %d, no msg\n", status->code); fprintf(f, "code: %d, no msg\n", status->code);
} }
} }
void upb_status_uninit(upb_status *status) { void upb_status_fromerrno(upb_status *status) {
upb_string_unref(status->str); upb_status_setf(status, UPB_ERROR, "%s", strerror(errno));
}
int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
uint32_t len = *size - ofs;
va_list args_copy;
va_copy(args_copy, args);
uint32_t true_len = vsnprintf(*buf + ofs, len, fmt, args_copy);
va_end(args_copy);
// Resize to be the correct size.
if (true_len >= len) {
// Need to print again, because some characters were truncated. vsnprintf
// will not write the entire string unless you give it space to store the
// NULL terminator also.
while (*size < (ofs + true_len + 1)) *size = UPB_MAX(*size * 2, 2);
char *newbuf = realloc(*buf, *size);
if (!newbuf) return -1;
vsnprintf(newbuf + ofs, true_len + 1, fmt, args);
*buf = newbuf;
}
return true_len;
} }

@ -30,9 +30,7 @@ extern "C" {
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m))) #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
INLINE void nop_printf(const char *fmt, ...) { INLINE void nop_printf(const char *fmt, ...) { (void)fmt; }
(void)fmt;
}
#ifdef NDEBUG #ifdef NDEBUG
#define DEBUGPRINTF nop_printf #define DEBUGPRINTF nop_printf
@ -45,7 +43,6 @@ INLINE size_t upb_align_up(size_t val, size_t align) {
return val % align == 0 ? val : val + align - (val % align); return val % align == 0 ? val : val + align - (val % align);
} }
// The maximum that any submessages can be nested. Matches proto2's limit. // The maximum that any submessages can be nested. Matches proto2's limit.
// At the moment this specifies the size of several statically-sized arrays // At the moment this specifies the size of several statically-sized arrays
// and therefore setting it high will cause more memory to be used. Will // and therefore setting it high will cause more memory to be used. Will
@ -122,31 +119,16 @@ typedef struct {
extern const upb_type_info upb_types[]; extern const upb_type_info upb_types[];
/* Polymorphic values of .proto types *****************************************/ /* upb_value ******************************************************************/
struct _upb_string; struct _upb_strref;
typedef struct _upb_string upb_string;
struct _upb_array;
typedef struct _upb_array upb_array;
struct _upb_msg;
typedef struct _upb_msg upb_msg;
struct _upb_bytesrc;
typedef struct _upb_bytesrc upb_bytesrc;
struct _upb_fielddef; struct _upb_fielddef;
typedef struct _upb_fielddef upb_fielddef;
typedef int32_t upb_strlen_t; // Special constants for the upb_value.type field. These must not conflict
#define UPB_STRLEN_MAX INT32_MAX // with any members of FieldDescriptorProto.Type.
#define UPB_TYPE_ENDGROUP 0
// The type of a upb_value. This is like a upb_fieldtype_t, but adds the #define UPB_VALUETYPE_FIELDDEF 32
// constant UPB_VALUETYPE_ARRAY to represent an array. #define UPB_VALUETYPE_PTR 33
typedef uint8_t upb_valuetype_t;
#define UPB_TYPE_ENDGROUP 19 // Need to increase if more real types are added!
#define UPB_VALUETYPE_ARRAY 32
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
#define UPB_VALUETYPE_PTR 35
// A single .proto value. The owner must have an out-of-band way of knowing // A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use. // the type, so that it knows which union member to use.
@ -159,19 +141,15 @@ typedef struct {
int64_t int64; int64_t int64;
uint32_t uint32; uint32_t uint32;
bool _bool; bool _bool;
upb_string *str; struct _upb_strref *strref;
upb_bytesrc *bytesrc; struct _upb_fielddef *fielddef;
upb_msg *msg;
upb_array *arr;
upb_atomic_t *refcount;
upb_fielddef *fielddef;
void *_void; void *_void;
} val; } val;
#ifndef NDEBUG
// In debug mode we carry the value type around also so we can check accesses // In debug mode we carry the value type around also so we can check accesses
// to be sure the right member is being read. // to be sure the right member is being read.
#ifndef NDEBUG char type;
upb_valuetype_t type;
#endif #endif
} upb_value; } upb_value;
@ -183,7 +161,7 @@ typedef struct {
#define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \ #define UPB_VALUE_ACCESSORS(name, membername, ctype, proto_type) \
INLINE ctype upb_value_get ## name(upb_value val) { \ INLINE ctype upb_value_get ## name(upb_value val) { \
assert(val.type == proto_type || val.type == UPB_VALUETYPE_RAW); \ assert(val.type == proto_type); \
return val.val.membername; \ return val.val.membername; \
} \ } \
INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \ INLINE void upb_value_set ## name(upb_value *val, ctype cval) { \
@ -197,18 +175,14 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32)); UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64)); UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL)); UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction. UPB_VALUE_ACCESSORS(strref, strref, struct _upb_strref*, UPB_TYPE(STRING));
UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF); UPB_VALUE_ACCESSORS(fielddef, fielddef, struct _upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR); UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
extern upb_value UPB_NO_VALUE; extern upb_value UPB_NO_VALUE;
INLINE upb_atomic_t *upb_value_getrefcount(upb_value val) {
assert(val.type == UPB_TYPE(MESSAGE) || /* upb_status *****************************************************************/
val.type == UPB_TYPE(STRING) ||
val.type == UPB_VALUETYPE_ARRAY);
return val.val.refcount;
}
// Status codes used as a return value. Codes >0 are not fatal and can be // Status codes used as a return value. Codes >0 are not fatal and can be
// resumed. // resumed.
@ -224,42 +198,38 @@ enum upb_status_code {
// An unrecoverable error occurred. // An unrecoverable error occurred.
UPB_ERROR = -1, UPB_ERROR = -1,
// A recoverable error occurred (for example, data of the wrong type was
// encountered which we can skip over).
// UPB_STATUS_RECOVERABLE_ERROR = -2
}; };
// TODO: consider adding error space and code, to let ie. errno be stored // TODO: consider adding error space and code, to let ie. errno be stored
// as a proper code, or application-specific error codes. // as a proper code, or application-specific error codes.
struct _upb_status { typedef struct {
char code; char code;
upb_string *str; char *str; // NULL when no message is present. NULL-terminated.
}; char *buf; // Owned by the status.
size_t bufsize;
typedef struct _upb_status upb_status; } upb_status;
#define UPB_STATUS_INIT {UPB_OK, NULL}
#define UPB_ERRORMSG_MAXLEN 256
INLINE bool upb_ok(upb_status *status) { #define UPB_STATUS_INIT {UPB_OK, NULL, NULL, 0}
return status->code == UPB_OK;
}
INLINE void upb_status_init(upb_status *status) {
status->code = UPB_OK;
status->str = NULL;
}
void upb_status_init(upb_status *status);
void upb_status_uninit(upb_status *status); void upb_status_uninit(upb_status *status);
// Caller owns a ref on the returned string. INLINE bool upb_ok(upb_status *status) { return status->code == UPB_OK; }
upb_string *upb_status_tostring(upb_status *status); INLINE bool upb_iseof(upb_status *status) { return status->code == UPB_EOF; }
void upb_printerr(upb_status *status);
void upb_clearerr(upb_status *status); void upb_status_fromerrno(upb_status *status);
void upb_seterr(upb_status *status, enum upb_status_code code, const char *msg, void upb_status_print(upb_status *status, FILE *f);
...); void upb_status_clear(upb_status *status);
void upb_copyerr(upb_status *to, upb_status *from); void upb_status_setf(upb_status *status, enum upb_status_code code,
const char *fmt, ...);
void upb_status_copy(upb_status *to, upb_status *from);
// Like vaprintf, but uses *buf (which can be NULL) as a starting point and
// reallocates it only if the new value will not fit. "size" is updated to
// reflect the allocated size of the buffer. Returns false on memory alloc
// failure.
int upb_vrprintf(char **buf, size_t *size, size_t ofs,
const char *fmt, va_list args);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

@ -1,120 +1,195 @@
/* /*
* upb - a minimalist implementation of protocol buffers. * upb - a minimalist implementation of protocol buffers.
* *
* Copyright (c) 2010-2011 Google Inc. See LICENSE for details. * Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com> * Author: Josh Haberman <jhaberman@gmail.com>
* *
* Defines the interfaces upb_bytesrc and upb_bytesink, which are abstractions * This file contains upb_bytesrc and upb_bytesink, which are abstractions of
* of read()/write() with useful buffering/sharing semantics. * stdio (fread()/fwrite()/etc) that provide useful buffering/sharing
* semantics. They are virtual base classes so concrete implementations
* can get the data from a fd, a string, a cord, etc.
*
* Byte streams are NOT thread-safe! (Like f{read,write}_unlocked())
*/ */
#ifndef UPB_BYTESTREAM_H #ifndef UPB_BYTESTREAM_H
#define UPB_BYTESTREAM_H #define UPB_BYTESTREAM_H
#include <stdarg.h> #include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "upb.h" #include "upb.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/* upb_bytesrc ****************************************************************/
// upb_bytesrc is a pull interface for streams of bytes, basically an /* upb_bytesrc ****************************************************************/
// abstraction of read()/fread(), but it avoids copies where possible.
typedef upb_strlen_t (*upb_bytesrc_read_fptr)( // A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
upb_bytesrc *src, void *buf, upb_strlen_t count, upb_status *status); // they become available, and to preserve some trailing amount of data.
typedef bool (*upb_bytesrc_getstr_fptr)( typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
upb_bytesrc *src, upb_string *str, upb_status *status); typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
typedef void upb_bytesrc_refregion_func(void*, uint64_t, size_t);
typedef void upb_bytesrc_ref_func(void*);
typedef struct _upb_bytesrc_vtbl {
upb_bytesrc_fetch_func *fetch;
upb_bytesrc_read_func *read;
upb_bytesrc_getptr_func *getptr;
upb_bytesrc_refregion_func *refregion;
upb_bytesrc_refregion_func *unrefregion;
upb_bytesrc_ref_func *ref;
upb_bytesrc_ref_func *unref;
} upb_bytesrc_vtbl;
typedef struct { typedef struct {
upb_bytesrc_read_fptr read; upb_bytesrc_vtbl *vtbl;
upb_bytesrc_getstr_fptr getstr; } upb_bytesrc;
} upb_bytesrc_vtbl;
struct _upb_bytesrc { INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
upb_bytesrc_vtbl *vtbl; src->vtbl = vtbl;
}; }
INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) { // Fetches at least minlen bytes starting at ofs, returning the actual number
s->vtbl = vtbl; // of bytes fetched (or 0 on error: see "s" for details). Gives caller a ref
// on the fetched region. It is safe to re-fetch existing regions but only if
// they are ref'd. "ofs" may not greater than the end of the region that was
// previously fetched.
INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
return src->vtbl->fetch(src, ofs, s);
} }
// Reads up to "count" bytes into "buf", returning the total number of bytes // Copies "len" bytes of data from offset src_ofs to "dst", which must be at
// read. If 0, indicates error and puts details in "status". // least "len" bytes long. The caller must own a ref on the given region.
INLINE upb_strlen_t upb_bytesrc_read(upb_bytesrc *src, void *buf, INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
upb_strlen_t count, upb_status *status) { char *dst) {
return src->vtbl->read(src, buf, count, status); src->vtbl->read(src, src_ofs, len, dst);
} }
// Like upb_bytesrc_read(), but modifies "str" in-place. Caller must ensure // Returns a pointer to the bytesrc's internal buffer, returning how much data
// that "str" is created or just recycled. Returns "false" if no data was // was actually returned (which may be less than "len" if the given region is
// returned, either due to error or EOF (check status for details). // not contiguous). The caller must own refs on the entire region from [ofs,
// ofs+len]. The returned buffer is valid for as long as the region remains
// ref'd.
// //
// In comparison to upb_bytesrc_read(), this call can possibly alias existing // TODO: is "len" really required here?
// string data (which avoids a copy). On the other hand, if the data was *not* INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
// already in an existing string, this copies it into a upb_string, and if the size_t *len) {
// data needs to be put in a specific range of memory (because eg. you need to return src->vtbl->getptr(src, ofs, len);
// put it into a different kind of string object) then upb_bytesrc_get() could }
// save you a copy.
INLINE bool upb_bytesrc_getstr(upb_bytesrc *src, upb_string *str, // Gives the caller a ref on the given region. The caller must know that the
upb_status *status) { // given region is already ref'd.
return src->vtbl->getstr(src, str, status); INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
src->vtbl->refregion(src, ofs, len);
}
// Releases a ref on the given region, which the caller must have previously
// ref'd.
INLINE void upb_bytesrc_unrefregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
src->vtbl->unrefregion(src, ofs, len);
}
// Attempts to ref the bytesrc itself, returning false if this bytesrc is
// not ref-able.
INLINE bool upb_bytesrc_tryref(upb_bytesrc *src) {
if (src->vtbl->ref) {
src->vtbl->ref(src);
return true;
} else {
return false;
}
}
// Unref's the bytesrc itself. May only be called when upb_bytesrc_tryref()
// has previously returned true.
INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
assert(src->vtbl->unref);
src->vtbl->unref(src);
}
/* upb_strref *****************************************************************/
// The structure we pass for a string.
typedef struct _upb_strref {
// Pointer to the string data. NULL if the string spans multiple input
// buffers (in which case upb_bytesrc_getptr() must be called to obtain
// the actual pointers).
const char *ptr;
// Bytesrc from which this string data comes. This is only guaranteed to be
// alive from inside the callback; however if the handler knows more about
// its type and how to prolong its life, it may do so.
upb_bytesrc *bytesrc;
// Offset in the bytesrc that represents the beginning of this string.
uint32_t stream_offset;
// Length of the string.
uint32_t len;
// Possibly add optional members here like start_line, start_column, etc.
} upb_strref;
// Copies the contents of the strref into a newly-allocated, NULL-terminated
// string.
INLINE char *upb_strref_dup(struct _upb_strref *r) {
char *ret = (char*)malloc(r->len + 1);
upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, ret);
ret[r->len] = '\0';
return ret;
} }
/* upb_bytesink ***************************************************************/ /* upb_bytesink ***************************************************************/
struct _upb_bytesink; typedef bool upb_bytesink_write_func(void*, const char*, size_t, upb_status*);
typedef struct _upb_bytesink upb_bytesink; typedef int32_t upb_bytesink_vprintf_func(
typedef upb_strlen_t (*upb_bytesink_putstr_fptr)( void*, upb_status*, const char *fmt, va_list args);
upb_bytesink *bytesink, upb_string *str, upb_status *status);
typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)(
upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args);
typedef struct { typedef struct {
upb_bytesink_putstr_fptr putstr; upb_bytesink_write_func *write;
upb_bytesink_vprintf_fptr vprintf; upb_bytesink_vprintf_func *vprintf;
} upb_bytesink_vtbl; } upb_bytesink_vtbl;
struct _upb_bytesink { typedef struct {
upb_bytesink_vtbl *vtbl; upb_bytesink_vtbl *vtbl;
}; } upb_bytesink;
INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) { INLINE void upb_bytesink_init(upb_bytesink *sink, upb_bytesink_vtbl *vtbl) {
s->vtbl = vtbl; sink->vtbl = vtbl;
} }
INLINE bool upb_bytesink_write(upb_bytesink *sink, const char *buf, size_t len,
upb_status *s) {
return sink->vtbl->write(sink, buf, len, s);
}
// TODO: Figure out how buffering should be handled. Should the caller buffer INLINE bool upb_bytesink_writestr(upb_bytesink *sink, const char *str,
// data and only call these functions when a buffer is full? Seems most upb_status *s) {
// efficient, but then buffering has to be configured in the caller, which return upb_bytesink_write(sink, str, strlen(str), s);
// could be anything, which makes it hard to have a standard interface for }
// controlling buffering.
// // Returns the number of bytes written or -1 on error.
// The downside of having the bytesink buffer is efficiency: the caller is INLINE int32_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
// making more (virtual) function calls, and the caller can't arrange to have const char *fmt, ...) {
// a big contiguous buffer. The bytesink can do this, but will have to copy
// to make the data contiguous.
// Returns the number of bytes written.
INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status,
const char *fmt, ...) {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args); uint32_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
va_end(args); va_end(args);
return ret; return ret;
} }
// Puts the given string, returning true if the operation was successful, otherwise // OPT: add getappendbuf()
// check "status" for details. Ownership of the string is *not* passed; if // OPT: add writefrombytesrc()
// the callee wants a reference he must call upb_string_getref() on it. // TODO: add flush()
INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str,
upb_status *status) {
return sink->vtbl->putstr(sink, str, status); /* upb_cbuf *******************************************************************/
}
// A circular buffer implementation for bytesrcs that do internal buffering.
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

@ -8,6 +8,7 @@
#include <inttypes.h> #include <inttypes.h>
#include <stddef.h> #include <stddef.h>
#include <stdlib.h> #include <stdlib.h>
#include "bswap.h"
#include "upb_bytestream.h" #include "upb_bytestream.h"
#include "upb_decoder.h" #include "upb_decoder.h"
#include "upb_varint.h" #include "upb_varint.h"
@ -38,83 +39,97 @@ static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d; upb_decoder *d = _d;
upb_decoder_exit(d); upb_decoder_exit(d);
} }
static void upb_decoder_abort(upb_decoder *d, const char *msg) {
upb_status_setf(d->status, UPB_ERROR, msg);
upb_decoder_exit(d);
}
/* Decoding/Buffering of wire types *******************************************/ /* Decoding/Buffering of wire types *******************************************/
#define UPB_MAX_VARINT_ENCODED_SIZE 10
static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; }
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
static void upb_decoder_advance(upb_decoder *d, size_t len) {
assert((size_t)(d->end - d->ptr) >= len);
d->ptr += len;
}
size_t upb_decoder_offset(upb_decoder *d) { size_t upb_decoder_offset(upb_decoder *d) {
size_t offset = d->buf_stream_offset; size_t offset = d->bufstart_ofs;
if (d->buf) offset += (d->ptr - d->buf); if (d->ptr) offset += (d->ptr - d->buf);
return offset; return offset;
} }
static void upb_decoder_setmsgend(upb_decoder *d) { static void upb_decoder_setmsgend(upb_decoder *d) {
uint32_t end = d->dispatcher.top->end_offset; upb_dispatcher_frame *f = d->dispatcher.top;
d->submsg_end = (end == UPB_NONDELIMITED) ? (void*)UINTPTR_MAX : d->buf + end; size_t delimlen = f->end_ofs - d->bufstart_ofs;
size_t buflen = d->end - d->buf;
if (f->end_ofs != UINT64_MAX && delimlen <= buflen) {
d->delim_end = (uintptr_t)(d->buf + delimlen);
} else {
// Buffers must not run up against the end of memory.
assert((uintptr_t)d->end < UINTPTR_MAX);
d->delim_end = UINTPTR_MAX;
}
} }
// Pulls the next buffer from the bytesrc. Should be called only when the // Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty. // current buffer is completely empty.
static void upb_pullbuf(upb_decoder *d, bool need) { static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0); assert(upb_decoder_bufleft(d) == 0);
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; if (d->bufend_ofs == d->refend_ofs) {
upb_string_recycle(&d->bufstr); d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { if (!upb_ok(d->status)) {
d->buf = NULL; d->ptr = NULL;
d->end = NULL; d->end = NULL;
if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF."); if (upb_iseof(d->status)) return false;
upb_decoder_exit(d); upb_decoder_exit(d);
} }
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
if (f->end_offset != UPB_NONDELIMITED)
f->end_offset -= last_buf_len;
} }
d->buf = upb_string_getrobuf(d->bufstr); d->bufstart_ofs = d->bufend_ofs;
d->ptr = upb_string_getrobuf(d->bufstr); size_t len;
d->end = d->buf + upb_string_len(d->bufstr); d->buf = upb_bytesrc_getptr(d->bytesrc, d->bufstart_ofs, &len);
assert(len > 0);
d->bufend_ofs = d->bufstart_ofs + len;
d->ptr = d->buf;
d->end = d->buf + len;
#ifdef UPB_USE_JIT_X64
d->jit_end = d->end - 20; d->jit_end = d->end - 20;
upb_string_recycle(&d->tmp); #endif
upb_string_substr(d->tmp, d->bufstr, 0, 0);
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
return true;
} }
// Called only from the slow path, this function copies the next "len" bytes static void upb_pullbuf(upb_decoder *d) {
// from the stream to "data", adjusting the decoder state appropriately. if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
NOINLINE void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) { }
while (1) {
size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d)); void upb_decoder_commit(upb_decoder *d) {
memcpy(data, d->ptr, to_copy); d->completed_ptr = d->ptr;
upb_decoder_advance(d, to_copy); if (d->refstart_ofs < d->bufstart_ofs) {
bytes -= to_copy; // Drop our ref on the previous buf's region.
if (bytes == 0) return; upb_bytesrc_refregion(d->bytesrc, d->bufstart_ofs, d->refend_ofs);
upb_pullbuf(d, need); upb_bytesrc_unrefregion(d->bytesrc, d->refstart_ofs, d->refend_ofs);
d->refstart_ofs = d->bufstart_ofs;
} }
} }
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) { NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80; uint8_t byte = 0x80;
uint64_t u64 = 0; uint64_t u64 = 0;
int bitpos; int bitpos;
const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
upb_getbuf(d, &byte, 1, need); if (upb_decoder_bufleft(d) == 0) {
u64 |= ((uint64_t)byte & 0x7F) << bitpos; upb_pullbuf(d);
} ptr = d->ptr;
}
if(bitpos == 70 && (byte & 0x80)) { u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
upb_decoder_exit(d);
} }
if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
return u64; return u64;
} }
// For tags and delimited lengths, which must be <=32bit and are usually small. // For tags and delimited lengths, which must be <=32bit and are usually small.
FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) { FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
const char *p = d->ptr; const char *p = d->ptr;
uint32_t ret; uint32_t ret;
uint64_t u64; uint64_t u64;
@ -125,11 +140,8 @@ FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
ret |= (*p & 0x7f) << 7; ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely if ((*(p++) & 0x80) == 0) goto done; // likely
slow: slow:
u64 = upb_decode_varint_slow(d, need); u64 = upb_decode_varint_slow(d);
if (u64 > 0xffffffff) { if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n");
upb_decoder_exit(d);
}
ret = (uint32_t)u64; ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop. p = d->ptr; // Turn the next line into a nop.
done: done:
@ -137,57 +149,90 @@ done:
return ret; return ret;
} }
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
if (upb_decoder_bufleft(d) == 0) {
// Check for our two normal end-of-message conditions.
if (d->bufend_ofs == d->end_ofs) return false;
if (!upb_trypullbuf(d)) return false;
}
*val = upb_decode_varint32(d);
return true;
}
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) { FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 16) { if (upb_decoder_bufleft(d) >= 10) {
// Common (fast) case. // Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr); upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) { if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
upb_decoder_exit(d);
}
upb_decoder_advance(d, r.p - d->ptr); upb_decoder_advance(d, r.p - d->ptr);
return r.val; return r.val;
} else { } else if (upb_decoder_bufleft(d) > 0) {
return upb_decode_varint_slow(d, true); // Intermediate case -- worth it?
char tmpbuf[10];
memset(tmpbuf, 0x80, 10);
memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
upb_decoderet r = upb_vdecode_fast(tmpbuf);
if (r.p != NULL) {
upb_decoder_advance(d, r.p - tmpbuf);
return r.val;
}
} }
// Slow case -- varint spans buffer seam.
return upb_decode_varint_slow(d);
} }
FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) { FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) { if (upb_decoder_bufleft(d) >= bytes) {
// Common (fast) case. // Fast case.
memcpy(val, d->ptr, bytes); memcpy(buf, d->ptr, bytes);
upb_decoder_advance(d, bytes); upb_decoder_advance(d, bytes);
} else { } else {
upb_getbuf(d, val, bytes, true); // Slow case.
size_t read = 0;
while (read < bytes) {
size_t avail = upb_decoder_bufleft(d);
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
}
} }
} }
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32; uint32_t u32;
upb_decode_fixed(d, &u32, sizeof(uint32_t)); upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
return u32; return le32toh(u32);
} }
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64; uint64_t u64;
upb_decode_fixed(d, &u64, sizeof(uint64_t)); upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
return u64; return le64toh(u64);
} }
INLINE upb_string *upb_decode_string(upb_decoder *d) { INLINE upb_strref *upb_decode_string(upb_decoder *d) {
upb_string_recycle(&d->tmp); uint32_t strlen = upb_decode_varint32(d);
uint32_t strlen = upb_decode_varint32(d, true); d->strref.stream_offset = upb_decoder_offset(d);
d->strref.len = strlen;
if (upb_decoder_bufleft(d) == 0) upb_pullbuf(d);
if (upb_decoder_bufleft(d) >= strlen) { if (upb_decoder_bufleft(d) >= strlen) {
// Common (fast) case. // Fast case.
upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen); d->strref.ptr = d->ptr;
upb_decoder_advance(d, strlen); upb_decoder_advance(d, strlen);
} else { } else {
upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true); // Slow case.
while (1) {
size_t consume = UPB_MIN(upb_decoder_bufleft(d), strlen);
upb_decoder_advance(d, consume);
strlen -= consume;
if (strlen == 0) break;
upb_pullbuf(d);
}
} }
return d->tmp; return &d->strref;
} }
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_offset = end; upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
@ -224,7 +269,7 @@ T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat) T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32) T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64) T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, str, upb_string*) T(STRING, string, strref, upb_strref*)
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) { static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED); upb_push(d, f, UPB_NONDELIMITED);
@ -235,28 +280,24 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
} }
/* The main decoding loop *****************************************************/ /* The main decoding loop *****************************************************/
// Called when a user callback returns something other than UPB_CONTINUE. static void upb_decoder_checkdelim(upb_decoder *d) {
// This should unwind one or more stack frames, skipping the corresponding while ((uintptr_t)d->ptr >= d->delim_end) {
// data in the input. if ((uintptr_t)d->ptr > d->delim_end)
upb_decoder_abort(d, "Bad submessage end");
static void upb_delimend(upb_decoder *d) { if (d->dispatcher.top->is_sequence) {
if (d->ptr > d->submsg_end) { upb_dispatch_endseq(&d->dispatcher);
upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); } else {
upb_decoder_exit(d); upb_dispatch_endsubmsg(&d->dispatcher);
} }
upb_decoder_setmsgend(d);
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
upb_dispatch_endsubmsg(&d->dispatcher);
} }
upb_decoder_setmsgend(d);
} }
static void upb_decoder_enterjit(upb_decoder *d) { static void upb_decoder_enterjit(upb_decoder *d) {
@ -273,7 +314,8 @@ static void upb_decoder_enterjit(upb_decoder *d) {
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) { while (1) {
uint32_t tag = upb_decode_varint32(d, false); uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf // There are no explicit "startseq" or "endseq" markers in protobuf
@ -287,8 +329,8 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
// TODO: support packed. // TODO: support packed.
assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
(tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
uint32_t end = d->dispatcher.top->end_offset; uint32_t end = d->dispatcher.top->end_ofs;
upb_dispatch_startseq(&d->dispatcher, f)->end_offset = end; upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
if (f) return f; if (f) return f;
@ -299,11 +341,13 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break; case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED: case UPB_WIRE_TYPE_DELIMITED:
upb_decoder_advance(d, upb_decode_varint32(d, true)); upb_decoder_advance(d, upb_decode_varint32(d)); break;
break; default:
upb_decoder_abort(d, "Invavlid wire type");
} }
// TODO: deliver to unknown field callback. // TODO: deliver to unknown field callback.
while (d->ptr >= d->submsg_end) upb_delimend(d); upb_decoder_commit(d);
upb_decoder_checkdelim(d);
} }
} }
@ -311,11 +355,11 @@ void upb_decoder_onexit(upb_decoder *d) {
if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher); if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file. // Normal end-of-file.
upb_clearerr(d->status); upb_status_clear(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status); upb_dispatch_endmsg(&d->dispatcher, d->status);
} else { } else {
if (d->status->code == UPB_EOF) if (d->status->code == UPB_EOF)
upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage."); upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
} }
} }
@ -325,26 +369,32 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
return; return;
} }
d->status = status; d->status = status;
upb_pullbuf(d, true);
upb_dispatch_startmsg(&d->dispatcher); upb_dispatch_startmsg(&d->dispatcher);
while(1) { // Main loop: executed once per tag/field pair. while(1) { // Main loop: executed once per tag/field pair.
while (d->ptr >= d->submsg_end) upb_delimend(d); upb_decoder_checkdelim(d);
upb_decoder_enterjit(d); upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed) // if (!d->dispatcher.top->is_packed)
upb_fhandlers *f = upb_decode_tag(d); upb_fhandlers *f = upb_decode_tag(d);
if (!f) upb_decoder_exit2(d);
f->decode(d, f); f->decode(d, f);
upb_decoder_commit(d);
} }
} }
static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top, static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
upb_dispatcher_frame *bottom) { upb_dispatcher_frame *bottom) {
(void)top; (void)top;
(void)bottom;
(void)_d;
#if 0
upb_decoder *d = _d; upb_decoder *d = _d;
// TODO
if (bottom->end_offset == UPB_NONDELIMITED) { if (bottom->end_offset == UPB_NONDELIMITED) {
// TODO: support skipping groups. // TODO: support skipping groups.
abort(); abort();
} }
d->ptr = d->buf + bottom->end_offset; d->ptr = d->buf.ptr + bottom->end_offset;
#endif
} }
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) { void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
@ -354,10 +404,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
d->jit_code = NULL; d->jit_code = NULL;
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d); if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif #endif
d->bufstr = NULL;
d->tmp = NULL;
upb_string_recycle(&d->tmp);
// Set function pointers for each field's decode function. // Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) { for (int i = 0; i < handlers->msgs_len; i++) {
upb_mhandlers *m = handlers->msgs[i]; upb_mhandlers *m = handlers->msgs[i];
@ -396,19 +442,27 @@ void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
upb_handlers_unref(h); upb_handlers_unref(h);
} }
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED; uint64_t end_ofs, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
f->end_ofs = end_ofs;
d->end_ofs = end_ofs;
d->refstart_ofs = start_ofs;
d->refend_ofs = start_ofs;
d->bufstart_ofs = start_ofs;
d->bufend_ofs = start_ofs;
d->bytesrc = bytesrc; d->bytesrc = bytesrc;
d->buf = NULL; d->buf = NULL;
d->ptr = NULL; d->ptr = NULL;
d->end = NULL; // Force a buffer pull. d->end = NULL; // Force a buffer pull.
d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered. #ifdef UPB_USE_JIT_X64
d->buf_stream_offset = 0; d->jit_end = NULL;
#endif
d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered.
d->strref.bytesrc = bytesrc;
} }
void upb_decoder_uninit(upb_decoder *d) { void upb_decoder_uninit(upb_decoder *d) {
upb_string_unref(d->bufstr);
upb_string_unref(d->tmp);
#ifdef UPB_USE_JIT_X64 #ifdef UPB_USE_JIT_X64
if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d); if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
#endif #endif

@ -30,44 +30,33 @@ extern "C" {
struct dasm_State; struct dasm_State;
struct _upb_decoder { typedef struct _upb_decoder {
// Bytesrc from which we pull serialized data. upb_bytesrc *bytesrc; // Source of our serialized data.
upb_bytesrc *bytesrc; upb_dispatcher dispatcher; // Dispatcher to which we push parsed data.
upb_status *status; // Where we will store any errors that occur.
upb_strref strref; // For passing string data to callbacks.
// String to hold our input buffer; is only active if d->buf != NULL. // Offsets for the region we currently have ref'd.
upb_string *bufstr; uint64_t refstart_ofs, refend_ofs;
// Temporary string for passing string data to callbacks. // Current buffer and its stream offset.
upb_string *tmp; const char *buf, *ptr, *end;
uint64_t bufstart_ofs, bufend_ofs;
// The offset within the overall stream represented by the *beginning* of buf. // Stream offset for the end of the top-level message, if any.
size_t buf_stream_offset; uint64_t end_ofs;
// Pointer to the beginning of our current data buffer, or NULL if none. // Buf offset as of which we've delivered calbacks; needed for rollback on
const char *buf; // UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
const char *completed_ptr;
// End of this buffer, relative to *ptr. // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in
const char *end; // this buf.
const char *jit_end; uintptr_t delim_end;
// Members which may also be written by the JIT: #ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
// Our current position in the data buffer. const char *jit_end, *effective_end; // == MIN(jit_end, submsg_end)
const char *ptr;
// End of this submessage, relative to *ptr.
const char *submsg_end;
// MIN(end, submsg_end)
const char *effective_end;
upb_fhandlers *f;
// Where we will store any errors that occur.
upb_status *status;
// Dispatcher to which we push parsed data.
upb_dispatcher dispatcher;
// JIT-generated machine code (else NULL). // JIT-generated machine code (else NULL).
char *jit_code; char *jit_code;
@ -75,21 +64,10 @@ struct _upb_decoder {
char *debug_info; char *debug_info;
struct dasm_State *dynasm; struct dasm_State *dynasm;
sigjmp_buf exitjmp; #endif
};
// For use in the upb_dispatcher's stack.
typedef struct {
// Relative to the beginning of this buffer.
// For groups and the top-level: UINT32_MAX.
uint32_t end_offset;
bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND
} upb_decoder_srcdata;
// A upb_decoder decodes the binary protocol buffer format, writing the data it sigjmp_buf exitjmp;
// decodes to a upb_sink. } upb_decoder;
struct _upb_decoder;
typedef struct _upb_decoder upb_decoder;
// Initializes/uninitializes a decoder for calling into the given handlers // Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref // or to write into the given msgdef, given its accessors). Takes a ref
@ -107,7 +85,10 @@ void upb_decoder_uninit(upb_decoder *d);
// state where it has not seen any data, and expects the next data to be from // state where it has not seen any data, and expects the next data to be from
// the beginning of a new protobuf. Parsers must be reset before they can be // the beginning of a new protobuf. Parsers must be reset before they can be
// used. A decoder can be reset multiple times. // used. A decoder can be reset multiple times.
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure); //
// Pass UINT64_MAX for end_ofs to indicate a non-delimited top-level message.
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *src, uint64_t start_ofs,
uint64_t end_ofs, void *closure);
void upb_decoder_decode(upb_decoder *d, upb_status *status); void upb_decoder_decode(upb_decoder *d, upb_status *status);

@ -120,7 +120,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|.define PTR, rbx |.define PTR, rbx
|.define CLOSURE, r12 |.define CLOSURE, r12
|.type FRAME, upb_dispatcher_frame, r13 |.type FRAME, upb_dispatcher_frame, r13
|.type STRING, upb_string, r14 |.type STRREF, upb_strref, r14
|.type DECODER, upb_decoder, r15 |.type DECODER, upb_decoder, r15
| |
|.macro callp, addr |.macro callp, addr
@ -199,7 +199,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| jae ->exit_jit // Frame stack overflow. | jae ->exit_jit // Frame stack overflow.
| mov qword FRAME:rax->f, f | mov qword FRAME:rax->f, f
| mov qword FRAME:rax->closure, closure_ | mov qword FRAME:rax->closure, closure_
| mov dword FRAME:rax->end_offset, end_offset_ | mov dword FRAME:rax->end_ofs, end_offset_
| mov byte FRAME:rax->is_sequence, is_sequence_ | mov byte FRAME:rax->is_sequence, is_sequence_
| mov CLOSURE, rdx | mov CLOSURE, rdx
| mov DECODER->dispatcher.top, rax | mov DECODER->dispatcher.top, rax
@ -217,17 +217,17 @@ void upb_reg_jit_gdb(upb_decoder *d) {
| mov rsi, DECODER->jit_end | mov rsi, DECODER->jit_end
|| if (m->is_group) { || if (m->is_group) {
| mov64 rax, 0xffffffffffffffff | mov64 rax, 0xffffffffffffffff
| mov qword DECODER->submsg_end, rax | mov qword DECODER->delim_end, rax
| mov DECODER->effective_end, rsi | mov DECODER->effective_end, rsi
|| } else { || } else {
| // Could store a correctly-biased version in the frame, at the cost of | // Could store a correctly-biased version in the frame, at the cost of
| // a larger stack. | // a larger stack.
| mov eax, dword FRAME->end_offset | mov eax, dword FRAME->end_ofs
| add rax, qword DECODER->buf | add rax, qword DECODER->buf
| mov DECODER->submsg_end, rax // submsg_end = d->buf + f->end_offset | mov DECODER->delim_end, rax // delim_end = d->buf + f->end_ofs
| cmp rax, rsi | cmp rax, rsi
| jb >8 | jb >8
| mov rax, rsi // effective_end = min(d->submsg_end, d->jit_end) | mov rax, rsi // effective_end = min(d->delim_end, d->jit_end)
|8: |8:
| mov DECODER->effective_end, rax | mov DECODER->effective_end, rax
|| } || }
@ -293,7 +293,7 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
} else { } else {
| mov rdx, CLOSURE | mov rdx, CLOSURE
} }
| mov esi, FRAME->end_offset | mov esi, FRAME->end_ofs
| pushframe f, rdx, esi, true | pushframe f, rdx, esi, true
} }
@ -357,10 +357,14 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
// buf, which sidesteps any security problems. The C path has more // buf, which sidesteps any security problems. The C path has more
// robust checks. // robust checks.
| decode_varint tag_size | decode_varint tag_size
| mov STRING->len, ARG3_32 | mov STRREF->len, ARG3_32
| mov STRING->ptr, PTR | mov STRREF->ptr, PTR
| mov rax, PTR
| sub rax, DECODER->buf
| add eax, DECODER->bufstart_ofs // = d->ptr - d->buf + d->bufstart_ofs
| mov STRREF->stream_offset, eax
| add PTR, ARG3_64 | add PTR, ARG3_64
| mov ARG3_64, STRING | mov ARG3_64, STRREF
| cmp PTR, DECODER->effective_end | cmp PTR, DECODER->effective_end
| ja ->exit_jit // Can't deliver, whole string not in buf. | ja ->exit_jit // Can't deliver, whole string not in buf.
break; break;
@ -514,7 +518,7 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_mhandlers *m) {
// This case doesn't exist for groups, because there eob really means // This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly. // eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel: |=>m->jit_endofbuf_pclabel:
| cmp PTR, DECODER->submsg_end | cmp PTR, DECODER->delim_end
| jb ->exit_jit // We are at eob, but not end-of-submsg. | jb ->exit_jit // We are at eob, but not end-of-submsg.
} }
@ -550,7 +554,7 @@ static void upb_decoder_jit(upb_decoder *d) {
| push rbx | push rbx
| mov DECODER, ARG1_64 | mov DECODER, ARG1_64
| mov FRAME, DECODER:ARG1_64->dispatcher.top | mov FRAME, DECODER:ARG1_64->dispatcher.top
| mov STRING, DECODER:ARG1_64->tmp | lea STRREF, DECODER:ARG1_64->strref
| mov CLOSURE, FRAME->closure | mov CLOSURE, FRAME->closure
| mov PTR, DECODER->ptr | mov PTR, DECODER->ptr

@ -7,18 +7,11 @@
#include <stdlib.h> #include <stdlib.h>
#include <stddef.h> #include <stddef.h>
#include <string.h>
#include "upb_def.h" #include "upb_def.h"
#define alignof(t) offsetof(struct { char c; t x; }, x) #define alignof(t) offsetof(struct { char c; t x; }, x)
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
{
int off = len-1;
while(off > 0 && data[off] != c) --off;
return off;
}
void upb_deflist_init(upb_deflist *l) { void upb_deflist_init(upb_deflist *l) {
l->size = 8; l->size = 8;
l->defs = malloc(l->size * sizeof(void*)); l->defs = malloc(l->size * sizeof(void*));
@ -105,7 +98,8 @@ static void upb_def_init(upb_def *def, upb_deftype_t type) {
} }
static void upb_def_uninit(upb_def *def) { static void upb_def_uninit(upb_def *def) {
upb_string_unref(def->fqname); //fprintf(stderr, "Freeing def: %p\n", def);
free(def->fqname);
} }
@ -120,19 +114,19 @@ typedef struct _upb_unresolveddef {
// The target type name. This may or may not be fully qualified. It is // The target type name. This may or may not be fully qualified. It is
// tempting to want to use base.fqname for this, but that will be qualified // tempting to want to use base.fqname for this, but that will be qualified
// which is inappropriate for a name we still have to resolve. // which is inappropriate for a name we still have to resolve.
upb_string *name; char *name;
} upb_unresolveddef; } upb_unresolveddef;
// Is passed a ref on the string. // Is passed a ref on the string.
static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) { static upb_unresolveddef *upb_unresolveddef_new(const char *str) {
upb_unresolveddef *def = malloc(sizeof(*def)); upb_unresolveddef *def = malloc(sizeof(*def));
upb_def_init(&def->base, UPB_DEF_UNRESOLVED); upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
def->name = upb_string_getref(str); def->name = strdup(str);
return def; return def;
} }
static void upb_unresolveddef_free(struct _upb_unresolveddef *def) { static void upb_unresolveddef_free(struct _upb_unresolveddef *def) {
upb_string_unref(def->name); free(def->name);
upb_def_uninit(&def->base); upb_def_uninit(&def->base);
free(def); free(def);
} }
@ -152,7 +146,7 @@ static void upb_enumdef_free(upb_enumdef *e) {
upb_enum_iter i; upb_enum_iter i;
for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// Frees the ref taken when the string was parsed. // Frees the ref taken when the string was parsed.
upb_string_unref(upb_enum_iter_name(i)); free(upb_enum_iter_name(i));
} }
upb_strtable_free(&e->ntoi); upb_strtable_free(&e->ntoi);
upb_inttable_free(&e->iton); upb_inttable_free(&e->iton);
@ -170,12 +164,11 @@ upb_enumdef *upb_enumdef_dup(upb_enumdef *e) {
return new_e; return new_e;
} }
bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num) { bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num) {
if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL)) return false; if (upb_enumdef_iton(e, num) || upb_enumdef_ntoi(e, name, NULL))
upb_ntoi_ent ntoi_ent = {{name, 0}, num}; return false;
upb_iton_ent iton_ent = {0, name}; upb_strtable_insert(&e->ntoi, name, &num);
upb_strtable_insert(&e->ntoi, &ntoi_ent.e); upb_inttable_insert(&e->iton, num, strdup(name));
upb_inttable_insert(&e->iton, num, &iton_ent); // Uses strtable's ref on name
return true; return true;
} }
@ -193,19 +186,22 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter) {
return upb_inttable_next(&e->iton, iter); return upb_inttable_next(&e->iton, iter);
} }
upb_string *upb_enumdef_iton(upb_enumdef *def, int32_t num) { const char *upb_enumdef_iton(upb_enumdef *def, int32_t num) {
upb_iton_ent *e = upb_iton_ent *e = upb_inttable_fastlookup(&def->iton, num, sizeof(*e));
(upb_iton_ent*)upb_inttable_fastlookup(&def->iton, num, sizeof(*e)); return e ? e->str : NULL;
return e ? e->string : NULL;
} }
bool upb_enumdef_ntoi(upb_enumdef *def, upb_string *name, int32_t *num) { bool upb_enumdef_ntoil(upb_enumdef *def, char *name, size_t len, int32_t *num) {
upb_ntoi_ent *e = (upb_ntoi_ent*)upb_strtable_lookup(&def->ntoi, name); upb_ntoi_ent *e = upb_strtable_lookupl(&def->ntoi, name, len);
if (!e) return false; if (!e) return false;
if (num) *num = e->value; if (num) *num = e->value;
return true; return true;
} }
bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num) {
return upb_enumdef_ntoil(e, name, strlen(name), num);
}
/* upb_fielddef ***************************************************************/ /* upb_fielddef ***************************************************************/
@ -228,9 +224,9 @@ upb_fielddef *upb_fielddef_new() {
static void upb_fielddef_free(upb_fielddef *f) { static void upb_fielddef_free(upb_fielddef *f) {
if (upb_isstring(f)) { if (upb_isstring(f)) {
upb_string_unref(upb_value_getstr(f->defaultval)); free(upb_value_getptr(f->defaultval));
} }
upb_string_unref(f->name); free(f->name);
free(f); free(f);
} }
@ -270,18 +266,18 @@ static bool upb_fielddef_resolve(upb_fielddef *f, upb_def *def, upb_status *s) {
f->def = def; f->def = def;
if (f->type == UPB_TYPE(ENUM)) { if (f->type == UPB_TYPE(ENUM)) {
// Resolve the enum's default from a string to an integer. // Resolve the enum's default from a string to an integer.
upb_string *str = upb_value_getstr(f->defaultval); char *str = upb_value_getptr(f->defaultval);
assert(str); // Should point to either a real default or the empty string. assert(str); // Should point to either a real default or the empty string.
upb_enumdef *e = upb_downcast_enumdef(f->def); upb_enumdef *e = upb_downcast_enumdef(f->def);
int32_t val = 0; int32_t val = 0;
if (str == upb_emptystring()) { if (str[0] == '\0') {
upb_value_setint32(&f->defaultval, e->defaultval); upb_value_setint32(&f->defaultval, e->defaultval);
} else { } else {
bool success = upb_enumdef_ntoi(e, str, &val); bool success = upb_enumdef_ntoi(e, str, &val);
upb_string_unref(str); free(str);
if (!success) { if (!success) {
upb_seterr(s, UPB_ERROR, "Default enum value (" UPB_STRFMT ") is not a " upb_status_setf(s, UPB_ERROR, "Default enum value (%s) is not a "
"member of the enum", UPB_STRARG(str)); "member of the enum", str);
return false; return false;
} }
upb_value_setint32(&f->defaultval, val); upb_value_setint32(&f->defaultval, val);
@ -295,9 +291,9 @@ void upb_fielddef_setnumber(upb_fielddef *f, int32_t number) {
f->number = number; f->number = number;
} }
void upb_fielddef_setname(upb_fielddef *f, upb_string *name) { void upb_fielddef_setname(upb_fielddef *f, const char *name) {
assert(f->msgdef == NULL); assert(f->msgdef == NULL);
f->name = upb_string_getref(name); f->name = strdup(name);
} }
void upb_fielddef_settype(upb_fielddef *f, uint8_t type) { void upb_fielddef_settype(upb_fielddef *f, uint8_t type) {
@ -326,7 +322,7 @@ void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl)
f->accessor = vtbl; f->accessor = vtbl;
} }
void upb_fielddef_settypename(upb_fielddef *f, upb_string *name) { void upb_fielddef_settypename(upb_fielddef *f, const char *name) {
upb_def_unref(f->def); upb_def_unref(f->def);
f->def = UPB_UPCAST(upb_unresolveddef_new(name)); f->def = UPB_UPCAST(upb_unresolveddef_new(name));
} }
@ -424,9 +420,8 @@ bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
assert(f->msgdef == NULL); assert(f->msgdef == NULL);
f->msgdef = m; f->msgdef = m;
upb_itof_ent itof_ent = {0, f}; upb_itof_ent itof_ent = {0, f};
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent); upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e); upb_strtable_insert(&m->ntof, f->name, &f);
return true; return true;
} }
@ -493,7 +488,6 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
/* upb_symtabtxn **************************************************************/ /* upb_symtabtxn **************************************************************/
typedef struct { typedef struct {
upb_strtable_entry e;
upb_def *def; upb_def *def;
} upb_symtab_ent; } upb_symtab_ent;
@ -503,16 +497,19 @@ void upb_symtabtxn_init(upb_symtabtxn *t) {
void upb_symtabtxn_uninit(upb_symtabtxn *txn) { void upb_symtabtxn_uninit(upb_symtabtxn *txn) {
upb_strtable *t = &txn->deftab; upb_strtable *t = &txn->deftab;
upb_symtab_ent *e; upb_strtable_iter i;
for(e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def_unref(e->def); const upb_symtab_ent *e = upb_strtable_iter_value(&i);
free(e->def);
}
upb_strtable_free(t); upb_strtable_free(t);
} }
bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) { bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def) {
// TODO: check if already present. // TODO: check if already present.
upb_symtab_ent e = {{def->fqname, 0}, def}; upb_symtab_ent e = {def};
upb_strtable_insert(&t->deftab, &e.e); //fprintf(stderr, "txn Inserting: %p, ent: %p\n", e.def, &e);
upb_strtable_insert(&t->deftab, def->fqname, &e);
return true; return true;
} }
@ -531,59 +528,28 @@ err:
// Given a symbol and the base symbol inside which it is defined, find the // Given a symbol and the base symbol inside which it is defined, find the
// symbol's definition in t. // symbol's definition in t.
static upb_symtab_ent *upb_resolve(upb_strtable *t, static upb_symtab_ent *upb_resolve(upb_strtable *t,
upb_string *base, upb_string *sym) { const char *base, const char *sym) {
if(upb_string_len(sym) == 0) return NULL; if(strlen(sym) == 0) return NULL;
if(upb_string_getrobuf(sym)[0] == UPB_SYMBOL_SEPARATOR) { if(sym[0] == UPB_SYMBOL_SEPARATOR) {
// Symbols starting with '.' are absolute, so we do a single lookup. // Symbols starting with '.' are absolute, so we do a single lookup.
// Slice to omit the leading '.' // Slice to omit the leading '.'
upb_string *sym_str = upb_strslice(sym, 1, upb_string_len(sym) - 1); return upb_strtable_lookup(t, sym + 1);
upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
upb_string_unref(sym_str);
return e;
} else { } else {
// Remove components from base until we find an entry or run out. // Remove components from base until we find an entry or run out.
// TODO: This branch is totally broken, but currently not used. // TODO: This branch is totally broken, but currently not used.
upb_string *sym_str = upb_string_new(); (void)base;
int baselen = upb_string_len(base); assert(false);
upb_symtab_ent *ret = NULL; return NULL;
while(1) {
// sym_str = base[0...base_len] + UPB_SYMBOL_SEPARATOR + sym
upb_strlen_t len = baselen + upb_string_len(sym) + 1;
char *buf = upb_string_getrwbuf(sym_str, len);
memcpy(buf, upb_string_getrobuf(base), baselen);
buf[baselen] = UPB_SYMBOL_SEPARATOR;
memcpy(buf + baselen + 1, upb_string_getrobuf(sym), upb_string_len(sym));
upb_symtab_ent *e = upb_strtable_lookup(t, sym_str);
if (e) {
ret = e;
break;
} else if(baselen == 0) {
// No more scopes to try.
ret = NULL;
break;
}
baselen = my_memrchr(buf, UPB_SYMBOL_SEPARATOR, baselen);
}
upb_string_unref(sym_str);
return ret;
} }
} }
upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t) { void upb_symtabtxn_begin(upb_symtabtxn_iter *i, upb_symtabtxn *t) {
return upb_strtable_begin(&t->deftab); upb_strtable_begin(i, &t->deftab);
} }
void upb_symtabtxn_next(upb_symtabtxn_iter *i) { upb_strtable_next(i); }
upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i) { bool upb_symtabtxn_done(upb_symtabtxn_iter *i) { return upb_strtable_done(i); }
return upb_strtable_next(&t->deftab, i); upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *i) {
} const upb_symtab_ent *e = upb_strtable_iter_value(i);
bool upb_symtabtxn_done(upb_symtabtxn_iter i) {
return i == NULL;
}
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
upb_symtab_ent *e = iter;
return e->def; return e->def;
} }
@ -591,8 +557,10 @@ upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter) {
/* upb_symtab public interface ************************************************/ /* upb_symtab public interface ************************************************/
static void _upb_symtab_free(upb_strtable *t) { static void _upb_symtab_free(upb_strtable *t) {
upb_symtab_ent *e; upb_strtable_iter i;
for (e = upb_strtable_begin(t); e; e = upb_strtable_next(t, &e->e)) { upb_strtable_begin(&i, t);
for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
assert(upb_atomic_read(&e->def->refcount) == 0); assert(upb_atomic_read(&e->def->refcount) == 0);
upb_def_free(e->def); upb_def_free(e->def);
} }
@ -632,9 +600,11 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
// We may only use part of this, depending on how many symbols are of the // We may only use part of this, depending on how many symbols are of the
// correct type. // correct type.
upb_def **defs = malloc(sizeof(*defs) * total); upb_def **defs = malloc(sizeof(*defs) * total);
upb_symtab_ent *e = upb_strtable_begin(&s->symtab); upb_strtable_iter iter;
upb_strtable_begin(&iter, &s->symtab);
int i = 0; int i = 0;
for(; e; e = upb_strtable_next(&s->symtab, &e->e)) { for(; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&iter);
upb_def *def = e->def; upb_def *def = e->def;
assert(def); assert(def);
if(type == UPB_DEF_ANY || def->type == type) if(type == UPB_DEF_ANY || def->type == type)
@ -646,7 +616,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type) {
return defs; return defs;
} }
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) { upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym) {
upb_rwlock_rdlock(&s->lock); upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym); upb_symtab_ent *e = upb_strtable_lookup(&s->symtab, sym);
upb_def *ret = NULL; upb_def *ret = NULL;
@ -658,9 +628,9 @@ upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym) {
return ret; return ret;
} }
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *symbol) { upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym) {
upb_rwlock_rdlock(&s->lock); upb_rwlock_rdlock(&s->lock);
upb_symtab_ent *e = upb_resolve(&s->symtab, base, symbol); upb_symtab_ent *e = upb_resolve(&s->symtab, base, sym);
upb_def *ret = NULL; upb_def *ret = NULL;
if(e) { if(e) {
ret = e->def; ret = e->def;
@ -692,8 +662,9 @@ bool upb_symtab_dfs(upb_def *def, upb_def **open_defs, int n,
bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL); bool replacing = (upb_strtable_lookup(&txn->deftab, m->base.fqname) != NULL);
if (needcopy && !replacing) { if (needcopy && !replacing) {
upb_symtab_ent e = {{def->fqname, 0}, upb_def_dup(def)}; upb_symtab_ent e = {upb_def_dup(def)};
upb_strtable_insert(&txn->deftab, &e.e); //fprintf(stderr, "Replacing def: %p\n", e.def);
upb_strtable_insert(&txn->deftab, def->fqname, &e);
replacing = true; replacing = true;
} }
return replacing; return replacing;
@ -706,25 +677,29 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
// themselves be replaced with versions that will point to the new defs. // themselves be replaced with versions that will point to the new defs.
// Do a DFS -- any path that finds a new def must replace all ancestors. // Do a DFS -- any path that finds a new def must replace all ancestors.
upb_strtable *symtab = &s->symtab; upb_strtable *symtab = &s->symtab;
upb_symtab_ent *e; upb_strtable_iter i;
for(e = upb_strtable_begin(symtab); e; e = upb_strtable_next(symtab, &e->e)) { upb_strtable_begin(&i, symtab);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_def *open_defs[UPB_MAX_TYPE_DEPTH]; upb_def *open_defs[UPB_MAX_TYPE_DEPTH];
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
upb_symtab_dfs(e->def, open_defs, 0, txn); upb_symtab_dfs(e->def, open_defs, 0, txn);
} }
// Resolve all refs. // Resolve all refs.
upb_strtable *txntab = &txn->deftab; upb_strtable *txntab = &txn->deftab;
for(e = upb_strtable_begin(txntab); e; e = upb_strtable_next(txntab, &e->e)) { upb_strtable_begin(&i, txntab);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
const upb_symtab_ent *e = upb_strtable_iter_value(&i);
upb_msgdef *m = upb_dyncast_msgdef(e->def); upb_msgdef *m = upb_dyncast_msgdef(e->def);
if(!m) continue; if(!m) continue;
// Type names are resolved relative to the message in which they appear. // Type names are resolved relative to the message in which they appear.
upb_string *base = m->base.fqname; const char *base = m->base.fqname;
upb_msg_iter i; upb_msg_iter j;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { for(j = upb_msg_begin(m); !upb_msg_done(j); j = upb_msg_next(m, j)) {
upb_fielddef *f = upb_msg_iter_field(i); upb_fielddef *f = upb_msg_iter_field(j);
if(!upb_hasdef(f)) continue; // No resolving necessary. if(!upb_hasdef(f)) continue; // No resolving necessary.
upb_string *name = upb_downcast_unresolveddef(f->def)->name; const char *name = upb_downcast_unresolveddef(f->def)->name;
// Resolve from either the txntab (pending adds) or symtab (existing // Resolve from either the txntab (pending adds) or symtab (existing
// defs). If both exist, prefer the pending add, because it will be // defs). If both exist, prefer the pending add, because it will be
@ -732,17 +707,18 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
upb_symtab_ent *found; upb_symtab_ent *found;
if(!(found = upb_resolve(txntab, base, name)) && if(!(found = upb_resolve(txntab, base, name)) &&
!(found = upb_resolve(symtab, base, name))) { !(found = upb_resolve(symtab, base, name))) {
upb_seterr(status, UPB_ERROR, upb_status_setf(status, UPB_ERROR, "could not resolve symbol '%s' "
"could not resolve symbol '" UPB_STRFMT "'" "in context '%s'", name, base);
" in context '" UPB_STRFMT "'",
UPB_STRARG(name), UPB_STRARG(base));
return false; return false;
} }
// Check the type of the found def. // Check the type of the found def.
upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM; upb_fieldtype_t expected = upb_issubmsg(f) ? UPB_DEF_MSG : UPB_DEF_ENUM;
//fprintf(stderr, "found: %p\n", found);
//fprintf(stderr, "found->def: %p\n", found->def);
//fprintf(stderr, "found->def->type: %d\n", found->def->type);
if(found->def->type != expected) { if(found->def->type != expected) {
upb_seterr(status, UPB_ERROR, "Unexpected type"); upb_status_setf(status, UPB_ERROR, "Unexpected type");
return false; return false;
} }
if (!upb_fielddef_resolve(f, found->def, status)) return false; if (!upb_fielddef_resolve(f, found->def, status)) return false;
@ -751,9 +727,9 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
// The defs in the transaction have been vetted, and can be moved to the // The defs in the transaction have been vetted, and can be moved to the
// symtab without causing errors. // symtab without causing errors.
upb_symtab_ent *tmptab_e; upb_strtable_begin(&i, txntab);
for(tmptab_e = upb_strtable_begin(txntab); tmptab_e; for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
tmptab_e = upb_strtable_next(txntab, &tmptab_e->e)) { const upb_symtab_ent *tmptab_e = upb_strtable_iter_value(&i);
upb_def_movetosymtab(tmptab_e->def, s); upb_def_movetosymtab(tmptab_e->def, s);
upb_symtab_ent *symtab_e = upb_symtab_ent *symtab_e =
upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname); upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
@ -761,7 +737,8 @@ bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *txn, upb_status *status) {
upb_deflist_push(&s->olddefs, symtab_e->def); upb_deflist_push(&s->olddefs, symtab_e->def);
symtab_e->def = tmptab_e->def; symtab_e->def = tmptab_e->def;
} else { } else {
upb_strtable_insert(&s->symtab, &tmptab_e->e); //fprintf(stderr, "Inserting def: %p\n", tmptab_e->def);
upb_strtable_insert(&s->symtab, tmptab_e->def->fqname, tmptab_e);
} }
} }

@ -32,7 +32,7 @@ typedef struct _upb_symtab upb_symtab;
// All the different kind of defs we support. These correspond 1:1 with // All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file. // declarations in a .proto file.
typedef enum { typedef enum {
UPB_DEF_MSG = 0, UPB_DEF_MSG = 1,
UPB_DEF_ENUM, UPB_DEF_ENUM,
UPB_DEF_SERVICE, // Not yet implemented. UPB_DEF_SERVICE, // Not yet implemented.
@ -44,7 +44,7 @@ typedef enum {
/* upb_def: base class for defs **********************************************/ /* upb_def: base class for defs **********************************************/
typedef struct { typedef struct {
upb_string *fqname; // Fully qualified. char *fqname; // Fully qualified.
upb_symtab *symtab; // Def is mutable iff symtab == NULL. upb_symtab *symtab; // Def is mutable iff symtab == NULL.
upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0). upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
upb_deftype_t type; upb_deftype_t type;
@ -66,7 +66,7 @@ upb_def *upb_def_dup(upb_def *def);
// A upb_fielddef describes a single field in a message. It isn't a full def // A upb_fielddef describes a single field in a message. It isn't a full def
// in the sense that it derives from upb_def. It cannot stand on its own; it // in the sense that it derives from upb_def. It cannot stand on its own; it
// must be part of a upb_msgdef. It is also reference-counted. // must be part of a upb_msgdef. It is also reference-counted.
struct _upb_fielddef { typedef struct _upb_fielddef {
struct _upb_msgdef *msgdef; struct _upb_msgdef *msgdef;
upb_def *def; // if upb_hasdef(f) upb_def *def; // if upb_hasdef(f)
upb_atomic_t refcount; upb_atomic_t refcount;
@ -78,11 +78,11 @@ struct _upb_fielddef {
int16_t hasbit; int16_t hasbit;
uint16_t offset; uint16_t offset;
int32_t number; int32_t number;
upb_string *name; char *name;
upb_value defaultval; // Only meaningful for non-repeated scalars and strings. upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
upb_value fval; upb_value fval;
struct _upb_accessor_vtbl *accessor; struct _upb_accessor_vtbl *accessor;
}; } upb_fielddef;
upb_fielddef *upb_fielddef_new(); upb_fielddef *upb_fielddef_new();
void upb_fielddef_ref(upb_fielddef *f); void upb_fielddef_ref(upb_fielddef *f);
@ -93,7 +93,7 @@ upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; } INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; } INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; } INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; } INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; } INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; } INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; } INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
@ -114,7 +114,7 @@ upb_def *upb_fielddef_subdef(upb_fielddef *f);
// added to a msgdef. For the moment we do not allow these to be set once // added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future. // the fielddef is added to a msgdef -- this could be relaxed in the future.
void upb_fielddef_setnumber(upb_fielddef *f, int32_t number); void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
void upb_fielddef_setname(upb_fielddef *f, upb_string *name); void upb_fielddef_setname(upb_fielddef *f, const char *name);
// These writers may be called at any time prior to being put in a symtab. // These writers may be called at any time prior to being put in a symtab.
void upb_fielddef_settype(upb_fielddef *f, uint8_t type); void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
@ -124,7 +124,7 @@ void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
// The name of the message or enum this field is referring to. Must be found // The name of the message or enum this field is referring to. Must be found
// at name resolution time (when the symtabtxn is committed to the symtab). // at name resolution time (when the symtabtxn is committed to the symtab).
void upb_fielddef_settypename(upb_fielddef *f, upb_string *name); void upb_fielddef_settypename(upb_fielddef *f, const char *name);
// A variety of tests about the type of a field. // A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
@ -227,7 +227,7 @@ INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
return e ? e->f : NULL; return e ? e->f : NULL;
} }
INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, upb_string *name) { INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, char *name) {
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
return e ? e->f : NULL; return e ? e->f : NULL;
} }
@ -272,7 +272,7 @@ typedef struct {
typedef struct { typedef struct {
bool junk; bool junk;
upb_string *string; char *str;
} upb_iton_ent; } upb_iton_ent;
upb_enumdef *upb_enumdef_new(); upb_enumdef *upb_enumdef_new();
@ -288,12 +288,13 @@ void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this // Adds a value to the enumdef. Requires that no existing val has this
// name or number (returns false and does not add if there is). May only // name or number (returns false and does not add if there is). May only
// be called before the enumdef is in a symtab. // be called before the enumdef is in a symtab.
bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num); bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
// Lookups from name to integer and vice-versa. // Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num); bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
// Caller does not own a ref on the returned string. bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num); // Caller does not own the returned string.
const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined. // Iteration over name/value pairs. The order is undefined.
// Adding an enum val invalidates any iterators. // Adding an enum val invalidates any iterators.
@ -308,9 +309,9 @@ upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); } INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
// Iterator accessors. // Iterator accessors.
INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) { INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter); upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
return e->string; return e->str;
} }
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
return upb_inttable_iter_key(iter); return upb_inttable_iter_key(iter);
@ -340,7 +341,7 @@ bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
// Gets the def (if any) that is associated with this name in the symtab. // Gets the def (if any) that is associated with this name in the symtab.
// Caller does *not* inherit a ref on the def. // Caller does *not* inherit a ref on the def.
upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name); upb_def *upb_symtabtxn_get(upb_symtabtxn *t, char *name);
// Iterate over the defs that are part of the transaction. // Iterate over the defs that are part of the transaction.
// The order is undefined. // The order is undefined.
@ -350,12 +351,12 @@ upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
// i = upb_symtabtxn_next(t, i)) { // i = upb_symtabtxn_next(t, i)) {
// upb_def *def = upb_symtabtxn_iter_def(i); // upb_def *def = upb_symtabtxn_iter_def(i);
// } // }
typedef void* upb_symtabtxn_iter; typedef upb_strtable_iter upb_symtabtxn_iter;
upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t); void upb_symtabtxn_begin(upb_symtabtxn_iter* i, upb_symtabtxn *t);
upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i); void upb_symtabtxn_next(upb_symtabtxn_iter *i);
bool upb_symtabtxn_done(upb_symtabtxn_iter i); bool upb_symtabtxn_done(upb_symtabtxn_iter *i);
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter); upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter *iter);
/* upb_symtab *****************************************************************/ /* upb_symtab *****************************************************************/
@ -397,12 +398,12 @@ void upb_symtab_unref(upb_symtab *s);
// If a def is found, the caller owns one ref on the returned def. Otherwise // If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL. // returns NULL.
// TODO: make return const // TODO: make return const
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym); upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);
// Find an entry in the symbol table with this exact name. If a def is found, // Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL. // the caller owns one ref on the returned def. Otherwise returns NULL.
// TODO: make return const // TODO: make return const
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym); upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);
// Gets an array of pointers to all currently active defs in this symtab. The // Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref // caller owns the returned array (which is of length *count) as well as a ref

@ -9,19 +9,22 @@
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
#include "upb_string.h"
#include "upb_def.h" #include "upb_def.h"
/* Joins strings together, for example: // Returns a newly allocated string that joins input strings together, for example:
* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" // join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
* join("", "Baz") -> "Baz" // join("", "Baz") -> "Baz"
* Caller owns a ref on the returned string. */ // Caller owns a ref on the returned string. */
static upb_string *upb_join(upb_string *base, upb_string *name) { static char *upb_join(char *base, char *name) {
if (!base || upb_string_len(base) == 0) { if (!base || strlen(base) == 0) {
return upb_string_getref(name); return strdup(name);
} else { } else {
return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT, char *ret = malloc(strlen(base) + strlen(name) + 2);
UPB_STRARG(base), UPB_STRARG(name)); ret[0] = '\0';
strcat(ret, base);
strcat(ret, ".");
strcat(ret, name);
return ret;
} }
} }
@ -36,12 +39,12 @@ static upb_def *upb_deflist_last(upb_deflist *l) {
} }
// Qualify the defname for all defs starting with offset "start" with "str". // Qualify the defname for all defs starting with offset "start" with "str".
static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) { static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
for(uint32_t i = start; i < l->len; i++) { for(uint32_t i = start; i < l->len; i++) {
upb_def *def = l->defs[i]; upb_def *def = l->defs[i];
upb_string *name = def->fqname; char *name = def->fqname;
def->fqname = upb_join(str, name); def->fqname = upb_join(str, name);
upb_string_unref(name); free(name);
} }
} }
@ -59,13 +62,13 @@ void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
} }
void upb_descreader_uninit(upb_descreader *r) { void upb_descreader_uninit(upb_descreader *r) {
upb_string_unref(r->name); free(r->name);
upb_status_uninit(&r->status); upb_status_uninit(&r->status);
upb_deflist_uninit(&r->defs); upb_deflist_uninit(&r->defs);
upb_string_unref(r->default_string); free(r->default_string);
while (r->stack_len > 0) { while (r->stack_len > 0) {
upb_descreader_frame *f = &r->stack[--r->stack_len]; upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_string_unref(f->name); free(f->name);
} }
} }
@ -91,13 +94,14 @@ void upb_descreader_startcontainer(upb_descreader *r) {
void upb_descreader_endcontainer(upb_descreader *r) { void upb_descreader_endcontainer(upb_descreader *r) {
upb_descreader_frame *f = &r->stack[--r->stack_len]; upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_deflist_qualify(&r->defs, f->name, f->start); upb_deflist_qualify(&r->defs, f->name, f->start);
upb_string_unref(f->name); free(f->name);
f->name = NULL;
} }
void upb_descreader_setscopename(upb_descreader *r, upb_string *str) { void upb_descreader_setscopename(upb_descreader *r, char *str) {
upb_descreader_frame *f = &r->stack[r->stack_len-1]; upb_descreader_frame *f = &r->stack[r->stack_len-1];
upb_string_unref(f->name); free(f->name);
f->name = upb_string_getref(str); f->name = str;
} }
// Handlers for google.protobuf.FileDescriptorProto. // Handlers for google.protobuf.FileDescriptorProto.
@ -119,7 +123,7 @@ static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
upb_value val) { upb_value val) {
(void)fval; (void)fval;
upb_descreader *r = _r; upb_descreader *r = _r;
upb_descreader_setscopename(r, upb_value_getstr(val)); upb_descreader_setscopename(r, upb_strref_dup(upb_value_getstrref(val)));
return UPB_CONTINUE; return UPB_CONTINUE;
} }
@ -190,8 +194,8 @@ static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
upb_value val) { upb_value val) {
(void)fval; (void)fval;
upb_descreader *r = _r; upb_descreader *r = _r;
upb_string_unref(r->name); free(r->name);
r->name = upb_string_getref(upb_value_getstr(val)); r->name = upb_strref_dup(upb_value_getstrref(val));
r->saw_name = true; r->saw_name = true;
return UPB_CONTINUE; return UPB_CONTINUE;
} }
@ -210,7 +214,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_status *status) { upb_status *status) {
upb_descreader *r = _r; upb_descreader *r = _r;
if(!r->saw_number || !r->saw_name) { if(!r->saw_number || !r->saw_name) {
upb_seterr(status, UPB_ERROR, "Enum value missing name or number."); upb_status_setf(status, UPB_ERROR, "Enum value missing name or number.");
return; return;
} }
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
@ -220,7 +224,7 @@ static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_enumdef_setdefault(e, r->number); upb_enumdef_setdefault(e, r->number);
} }
upb_enumdef_addval(e, r->name, r->number); upb_enumdef_addval(e, r->name, r->number);
upb_string_unref(r->name); free(r->name);
r->name = NULL; r->name = NULL;
} }
@ -254,11 +258,11 @@ static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status)
upb_descreader *r = _r; upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) { if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
upb_seterr(status, UPB_ERROR, "Enum had no name."); upb_status_setf(status, UPB_ERROR, "Enum had no name.");
return; return;
} }
if (upb_inttable_count(&e->iton) == 0) { if (upb_inttable_count(&e->iton) == 0) {
upb_seterr(status, UPB_ERROR, "Enum had no values."); upb_status_setf(status, UPB_ERROR, "Enum had no values.");
return; return;
} }
} }
@ -269,8 +273,8 @@ static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
(void)fval; (void)fval;
upb_descreader *r = _r; upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r)); upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
upb_string_unref(e->base.fqname); free(e->base.fqname);
e->base.fqname = upb_string_getref(upb_value_getstr(val)); e->base.fqname = upb_strref_dup(upb_value_getstrref(val));
return UPB_CONTINUE; return UPB_CONTINUE;
} }
@ -298,99 +302,73 @@ static upb_flow_t upb_fielddef_startmsg(void *_r) {
return UPB_CONTINUE; return UPB_CONTINUE;
} }
// Converts the default value in string "dstr" into "d". Passes a ref on dstr. // Converts the default value in string "str" into "d". Passes a ref on str.
// Returns true on success. // Returns true on success.
static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) { static bool upb_fielddef_parsedefault(char *str, upb_value *d, int type) {
bool success = true; bool success = true;
if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) { if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
// We'll keep the ref we had on it. We include enums in this case because // We'll keep the ref we had on it. We include enums in this case because
// we need the enumdef to resolve the name, but we may not have it yet. // we need the enumdef to resolve the name, but we may not have it yet.
// We'll resolve it later. // We'll resolve it later.
if (dstr) { if (!str) str = strdup("");
upb_value_setstr(d, dstr); upb_value_setptr(d, str);
} else {
upb_value_setstr(d, upb_emptystring());
}
} else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) { } else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
// We don't expect to get a default value. // We don't expect to get a default value.
upb_string_unref(dstr); free(str);
if (dstr != NULL) success = false; if (str != NULL) success = false;
} else if (type == UPB_TYPE(BOOL)) {
if (!str || strcmp(str, "false") == 0)
upb_value_setbool(d, false);
else if (strcmp(str, "true") == 0)
upb_value_setbool(d, true);
else
success = false;
free(str);
} else { } else {
// The strto* functions need the string to be NULL-terminated. // The strto* functions need the string to be NULL-terminated.
char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr); if (!str) str = strdup("0");
char *end; char *end;
upb_string_unref(dstr);
switch (type) { switch (type) {
case UPB_TYPE(INT32): case UPB_TYPE(INT32):
case UPB_TYPE(SINT32): case UPB_TYPE(SINT32):
case UPB_TYPE(SFIXED32): case UPB_TYPE(SFIXED32): {
if (strz) { long val = strtol(str, &end, 0);
long val = strtol(strz, &end, 0); if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) success = false;
success = false; else
else upb_value_setint32(d, val);
upb_value_setint32(d, val);
} else {
upb_value_setint32(d, 0);
}
break; break;
}
case UPB_TYPE(INT64): case UPB_TYPE(INT64):
case UPB_TYPE(SINT64): case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64): case UPB_TYPE(SFIXED64):
if (strz) { upb_value_setint64(d, strtoll(str, &end, 0));
upb_value_setint64(d, strtoll(strz, &end, 0)); if (errno == ERANGE || *end) success = false;
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setint64(d, 0);
}
break; break;
case UPB_TYPE(UINT32): case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32): case UPB_TYPE(FIXED32): {
if (strz) { unsigned long val = strtoul(str, &end, 0);
unsigned long val = strtoul(strz, &end, 0); if (val > UINT32_MAX || errno == ERANGE || *end)
if (val > UINT32_MAX || errno == ERANGE || *end) success = false;
success = false; else
else upb_value_setuint32(d, val);
upb_value_setuint32(d, val);
} else {
upb_value_setuint32(d, 0);
}
break; break;
}
case UPB_TYPE(UINT64): case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64): case UPB_TYPE(FIXED64):
if (strz) { upb_value_setuint64(d, strtoull(str, &end, 0));
upb_value_setuint64(d, strtoull(strz, &end, 0)); if (errno == ERANGE || *end) success = false;
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setuint64(d, 0);
}
break; break;
case UPB_TYPE(DOUBLE): case UPB_TYPE(DOUBLE):
if (strz) { upb_value_setdouble(d, strtod(str, &end));
upb_value_setdouble(d, strtod(strz, &end)); if (errno == ERANGE || *end) success = false;
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setdouble(d, 0.0);
}
break; break;
case UPB_TYPE(FLOAT): case UPB_TYPE(FLOAT):
if (strz) { upb_value_setfloat(d, strtof(str, &end));
upb_value_setfloat(d, strtof(strz, &end)); if (errno == ERANGE || *end) success = false;
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setfloat(d, 0.0);
}
break;
case UPB_TYPE(BOOL):
if (!strz || strcmp(strz, "false") == 0)
upb_value_setbool(d, false);
else if (strcmp(strz, "true") == 0)
upb_value_setbool(d, true);
else
success = false;
break; break;
} }
free(strz); free(str);
} }
return success; return success;
} }
@ -405,13 +383,13 @@ static void upb_fielddef_endmsg(void *_r, upb_status *status) {
// Field was successfully read, add it as a field of the msgdef. // Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_descreader_top(r); upb_msgdef *m = upb_descreader_top(r);
upb_msgdef_addfield(m, f); upb_msgdef_addfield(m, f);
upb_string *dstr = r->default_string; char *dstr = r->default_string;
r->default_string = NULL; r->default_string = NULL;
upb_value val; upb_value val;
if (!upb_fielddef_parsedefault(dstr, &val, f->type)) { if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
// We don't worry too much about giving a great error message since the // We don't worry too much about giving a great error message since the
// compiler should have ensured this was correct. // compiler should have ensured this was correct.
upb_seterr(status, UPB_ERROR, "Error converting default value."); upb_status_setf(status, UPB_ERROR, "Error converting default value.");
return; return;
} }
upb_fielddef_setdefault(f, val); upb_fielddef_setdefault(f, val);
@ -441,7 +419,9 @@ static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val)
static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) { static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
(void)fval; (void)fval;
upb_descreader *r = _r; upb_descreader *r = _r;
upb_fielddef_setname(r->f, upb_value_getstr(val)); char *name = upb_strref_dup(upb_value_getstrref(val));
upb_fielddef_setname(r->f, name);
free(name);
return UPB_CONTINUE; return UPB_CONTINUE;
} }
@ -449,7 +429,9 @@ static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
upb_value val) { upb_value val) {
(void)fval; (void)fval;
upb_descreader *r = _r; upb_descreader *r = _r;
upb_fielddef_settypename(r->f, upb_value_getstr(val)); char *name = upb_strref_dup(upb_value_getstrref(val));
upb_fielddef_settypename(r->f, name);
free(name);
return UPB_CONTINUE; return UPB_CONTINUE;
} }
@ -459,8 +441,8 @@ static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
upb_descreader *r = _r; upb_descreader *r = _r;
// Have to convert from string to the correct type, but we might not know the // Have to convert from string to the correct type, but we might not know the
// type yet. // type yet.
upb_string_unref(r->default_string); free(r->default_string);
r->default_string = upb_string_getref(upb_value_getstr(val)); r->default_string = upb_strref_dup(upb_value_getstrref(val));
return UPB_CONTINUE; return UPB_CONTINUE;
} }
@ -501,7 +483,7 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r; upb_descreader *r = _r;
upb_msgdef *m = upb_descreader_top(r); upb_msgdef *m = upb_descreader_top(r);
if(!m->base.fqname) { if(!m->base.fqname) {
upb_seterr(status, UPB_ERROR, "Encountered message with no name."); upb_status_setf(status, UPB_ERROR, "Encountered message with no name.");
return; return;
} }
@ -514,9 +496,9 @@ static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
upb_descreader *r = _r; upb_descreader *r = _r;
assert(val.type == UPB_TYPE(STRING)); assert(val.type == UPB_TYPE(STRING));
upb_msgdef *m = upb_descreader_top(r); upb_msgdef *m = upb_descreader_top(r);
upb_string_unref(m->base.fqname); free(m->base.fqname);
m->base.fqname = upb_string_getref(upb_value_getstr(val)); m->base.fqname = upb_strref_dup(upb_value_getstrref(val));
upb_descreader_setscopename(r, upb_value_getstr(val)); upb_descreader_setscopename(r, strdup(m->base.fqname));
return UPB_CONTINUE; return UPB_CONTINUE;
} }

@ -28,7 +28,7 @@ extern "C" {
// definitions that are contained inside. "name" tracks the name of the // definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes). // message or package (a bare name -- not qualified by any enclosing scopes).
typedef struct { typedef struct {
upb_string *name; char *name;
// Index of the first def that is under this scope. For msgdefs, the // Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1. // msgdef itself is at start-1.
int start; int start;
@ -42,11 +42,11 @@ typedef struct {
upb_status status; upb_status status;
uint32_t number; uint32_t number;
upb_string *name; char *name;
bool saw_number; bool saw_number;
bool saw_name; bool saw_name;
upb_string *default_string; char *default_string;
upb_fielddef *f; upb_fielddef *f;
} upb_descreader; } upb_descreader;

@ -12,15 +12,15 @@
#include "upb_strstream.h" #include "upb_strstream.h"
#include "upb_textprinter.h" #include "upb_textprinter.h"
void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md, void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
upb_status *status) { upb_status *status) {
upb_stringsrc strsrc; upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc); upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str); upb_stringsrc_reset(&strsrc, str, len);
upb_decoder d; upb_decoder d;
upb_decoder_initformsgdef(&d, md); upb_decoder_initformsgdef(&d, md);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg); upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_decode(&d, status); upb_decoder_decode(&d, status);
upb_stringsrc_uninit(&strsrc); upb_stringsrc_uninit(&strsrc);
@ -53,10 +53,11 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
#endif #endif
// TODO: read->load. // TODO: read->load.
void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) { void upb_read_descriptor(upb_symtab *symtab, const char *str, size_t len,
upb_status *status) {
upb_stringsrc strsrc; upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc); upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str); upb_stringsrc_reset(&strsrc, str, len);
upb_handlers *h = upb_handlers_new(); upb_handlers *h = upb_handlers_new();
upb_descreader_reghandlers(h); upb_descreader_reghandlers(h);
@ -68,16 +69,16 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status
upb_symtabtxn txn; upb_symtabtxn txn;
upb_symtabtxn_init(&txn); upb_symtabtxn_init(&txn);
upb_descreader_init(&r, &txn); upb_descreader_init(&r, &txn);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r); upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &r);
upb_decoder_decode(&d, status); upb_decoder_decode(&d, status);
// Set default accessors and layouts on all messages. // Set default accessors and layouts on all messages.
// for msgdef in symtabtxn: // for msgdef in symtabtxn:
upb_symtabtxn_iter i; upb_symtabtxn_iter i;
for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i); upb_symtabtxn_begin(&i, &txn);
i = upb_symtabtxn_next(&txn, i)) { for(; !upb_symtabtxn_done(&i); upb_symtabtxn_next(&i)) {
upb_def *def = upb_symtabtxn_iter_def(i); upb_def *def = upb_symtabtxn_iter_def(&i);
upb_msgdef *md = upb_dyncast_msgdef(def); upb_msgdef *md = upb_dyncast_msgdef(def);
if (!md) return; if (!md) return;
// For field in msgdef: // For field in msgdef:
@ -96,3 +97,33 @@ void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status
upb_stringsrc_uninit(&strsrc); upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d); upb_decoder_uninit(&d);
} }
char *upb_readfile(const char *filename, size_t *len) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
char *buf = malloc(size);
if(fread(buf, size, 1, f) != 1) goto error;
fclose(f);
if (len) *len = size;
return buf;
error:
fclose(f);
return NULL;
}
void upb_read_descriptorfile(upb_symtab *symtab, const char *fname,
upb_status *status) {
size_t len;
char *data = upb_readfile(fname, &len);
if (!data) {
upb_status_setf(status, UPB_ERROR, "Couldn't read file: %s", fname);
return;
}
upb_read_descriptor(symtab, data, len, status);
free(data);
}

@ -27,6 +27,7 @@
#define UPB_GLUE_H #define UPB_GLUE_H
#include <stdbool.h> #include <stdbool.h>
#include "upb.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -36,20 +37,23 @@ extern "C" {
// Clients should use the regular, typedef'd names (eg. upb_string). // Clients should use the regular, typedef'd names (eg. upb_string).
struct _upb_msg; struct _upb_msg;
struct _upb_msgdef; struct _upb_msgdef;
struct _upb_status;
struct _upb_string;
struct _upb_symtab; struct _upb_symtab;
// Decodes the given string, which must be in protobuf binary format, to the // Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s". // given upb_msg with msgdef "md", storing the status of the operation in "s".
void upb_strtomsg(struct _upb_string *str, void *msg, void upb_strtomsg(const char *str, size_t len, void *msg,
struct _upb_msgdef *md, struct _upb_status *s); struct _upb_msgdef *md, upb_status *s);
void upb_msgtotext(struct _upb_string *str, void *msg, //void upb_msgtotext(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, bool single_line); // struct _upb_msgdef *md, bool single_line);
void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str, void upb_read_descriptor(struct _upb_symtab *symtab, const char *str, size_t len,
struct _upb_status *status); upb_status *status);
void upb_read_descriptorfile(struct _upb_symtab *symtab, const char *fname,
upb_status *status);
char *upb_readfile(const char *filename, size_t *len);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

@ -96,7 +96,6 @@ upb_mhandlers *upb_handlers_newmhandlers(upb_handlers *h) {
} }
typedef struct { typedef struct {
upb_strtable_entry e;
upb_mhandlers *mh; upb_mhandlers *mh;
} upb_mtab_ent; } upb_mtab_ent;
@ -105,8 +104,8 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
upb_onfieldreg *fieldreg_cb, upb_onfieldreg *fieldreg_cb,
void *closure, upb_strtable *mtab) { void *closure, upb_strtable *mtab) {
upb_mhandlers *mh = upb_handlers_newmhandlers(h); upb_mhandlers *mh = upb_handlers_newmhandlers(h);
upb_mtab_ent e = {{m->base.fqname, 0}, mh}; upb_mtab_ent e = {mh};
upb_strtable_insert(mtab, &e.e); upb_strtable_insert(mtab, m->base.fqname, &e);
if (msgreg_cb) msgreg_cb(closure, mh, m); if (msgreg_cb) msgreg_cb(closure, mh, m);
upb_msg_iter i; upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
@ -153,7 +152,7 @@ static upb_fhandlers toplevel_f = {
#ifdef NDEBUG #ifdef NDEBUG
{{0}}, {{0}},
#else #else
{{0}, UPB_VALUETYPE_RAW}, {{0}, -1},
#endif #endif
NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL}; NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL};
@ -198,23 +197,23 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
assert(d->top == d->stack); assert(d->top == d->stack);
if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status); if (d->msgent->endmsg) d->msgent->endmsg(d->top->closure, &d->status);
// TODO: should we avoid this copy by passing client's status obj to cbs? // TODO: should we avoid this copy by passing client's status obj to cbs?
upb_copyerr(status, &d->status); upb_status_copy(status, &d->status);
} }
void indent(upb_dispatcher *d) { void indent(upb_dispatcher *d) {
for (int i = 0; i < (d->top - d->stack); i++) printf(" "); for (int i = 0; i < (d->top - d->stack); i++) fprintf(stderr, " ");
} }
void indentm1(upb_dispatcher *d) { void indentm1(upb_dispatcher *d) {
for (int i = 0; i < (d->top - d->stack - 1); i++) printf(" "); for (int i = 0; i < (d->top - d->stack - 1); i++) fprintf(stderr, " ");
} }
upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_fhandlers *f) { upb_fhandlers *f) {
//indent(d); //indent(d);
//printf("START SEQ: %d\n", f->number); //fprintf(stderr, "START SEQ: %d\n", f->number);
if((d->top+1) >= d->limit) { if((d->top+1) >= d->limit) {
upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK); _upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy. return d->top; // Dummy.
} }
@ -235,7 +234,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) { upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
//indentm1(d); //indentm1(d);
//printf("END SEQ\n"); //fprintf(stderr, "END SEQ\n");
assert(d->top > d->stack); assert(d->top > d->stack);
assert(d->top->is_sequence); assert(d->top->is_sequence);
upb_fhandlers *f = d->top->f; upb_fhandlers *f = d->top->f;
@ -255,9 +254,9 @@ upb_dispatcher_frame *upb_dispatch_endseq(upb_dispatcher *d) {
upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_fhandlers *f) { upb_fhandlers *f) {
//indent(d); //indent(d);
//printf("START SUBMSG: %d\n", f->number); //fprintf(stderr, "START SUBMSG: %d\n", f->number);
if((d->top+1) >= d->limit) { if((d->top+1) >= d->limit) {
upb_seterr(&d->status, UPB_ERROR, "Nesting too deep."); upb_status_setf(&d->status, UPB_ERROR, "Nesting too deep.");
_upb_dispatcher_unwind(d, UPB_BREAK); _upb_dispatcher_unwind(d, UPB_BREAK);
return d->top; // Dummy. return d->top; // Dummy.
} }
@ -281,7 +280,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) { upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
//indentm1(d); //indentm1(d);
//printf("END SUBMSG\n"); //fprintf(stderr, "END SUBMSG\n");
assert(d->top > d->stack); assert(d->top > d->stack);
assert(!d->top->is_sequence); assert(!d->top->is_sequence);
upb_fhandlers *f = d->top->f; upb_fhandlers *f = d->top->f;

@ -17,6 +17,7 @@
#include <limits.h> #include <limits.h>
#include "upb.h" #include "upb.h"
#include "upb_def.h" #include "upb_def.h"
#include "upb_bytestream.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -303,14 +304,12 @@ typedef struct {
// Members to use as the data source requires. // Members to use as the data source requires.
void *srcclosure; void *srcclosure;
uint64_t end_ofs;
uint16_t msgindex; uint16_t msgindex;
uint16_t fieldindex; uint16_t fieldindex;
uint32_t end_offset;
// Does this frame represent a sequence or a submsg (f might be both). bool is_sequence; // frame represents seq or submsg? (f might be both).
// We only need a single bit here, but this will make each individual bool is_packed; // !upb_issubmsg(f) && end_ofs != UINT64_MAX (strings aren't pushed)
// frame grow from 32 to 40 bytes on LP64, which is a bit excessive.
bool is_sequence;
} upb_dispatcher_frame; } upb_dispatcher_frame;
// Called when some of the input needs to be skipped. All frames from // Called when some of the input needs to be skipped. All frames from

@ -7,6 +7,7 @@
* Data structure for storing a message of protobuf data. * Data structure for storing a message of protobuf data.
*/ */
#include "upb.h"
#include "upb_msg.h" #include "upb_msg.h"
void upb_msg_clear(void *msg, upb_msgdef *md) { void upb_msg_clear(void *msg, upb_msgdef *md) {
@ -132,23 +133,23 @@ UPB_ACCESSORS(bool, bool)
UPB_ACCESSORS(ptr, void*) UPB_ACCESSORS(ptr, void*)
#undef UPB_ACCESSORS #undef UPB_ACCESSORS
static void _upb_stdmsg_setstr(void *_dst, upb_value _src) { static void _upb_stdmsg_setstr(void *_dst, upb_value src) {
// We do: upb_stdarray **dstp = _dst;
// - upb_string_recycle(), upb_string_substr() instead of upb_stdarray *dst = *dstp;
// - upb_string_unref(), upb_string_getref() if (!dst) {
// because we can conveniently cache these upb_string objects in the dst = malloc(sizeof(*dst));
// upb_msg, whereas the upb_src who is sending us these strings may not dst->size = 0;
// have a good way of caching them. This saves the upb_src from allocating dst->ptr = NULL;
// new upb_strings all the time to give us. *dstp = dst;
// }
// If you were using this to copy one upb_msg to another this would dst->len = 0;
// allocate string objects whereas a upb_string_getref could have avoided upb_strref *ref = upb_value_getstrref(src);
// those allocations completely; if this is an issue, we could make it an if (ref->len > dst->size) {
// option of the upb_msgsink which behavior is desired. dst->size = ref->len;
upb_string **dst = _dst; dst->ptr = realloc(dst->ptr, dst->size);
upb_string *src = upb_value_getstr(_src); }
upb_string_recycle(dst); dst->len = ref->len;
upb_string_substr(*dst, src, 0, upb_string_len(src)); upb_bytesrc_read(ref->bytesrc, ref->stream_offset, ref->len, dst->ptr);
} }
upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) { upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
@ -166,15 +167,11 @@ upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
} }
upb_value upb_stdmsg_getstr(void *m, upb_value fval) { upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
upb_value val = upb_stdmsg_getptr(m, fval); return upb_stdmsg_getptr(m, fval);
upb_value_setstr(&val, upb_value_getptr(val));
return val;
} }
upb_value upb_stdmsg_seqgetstr(void *i) { upb_value upb_stdmsg_seqgetstr(void *i) {
upb_value val = upb_stdmsg_seqgetptr(i); return upb_stdmsg_seqgetptr(i);
upb_value_setstr(&val, upb_value_getptr(val));
return val;
} }
void *upb_stdmsg_new(upb_msgdef *md) { void *upb_stdmsg_new(upb_msgdef *md) {
@ -188,11 +185,13 @@ void upb_stdseq_free(void *s, upb_fielddef *f) {
upb_stdarray *a = s; upb_stdarray *a = s;
if (upb_issubmsg(f) || upb_isstring(f)) { if (upb_issubmsg(f) || upb_isstring(f)) {
void **p = (void**)a->ptr; void **p = (void**)a->ptr;
for (int i = 0; i < a->size; i++) { for (uint32_t i = 0; i < a->size; i++) {
if (upb_issubmsg(f)) { if (upb_issubmsg(f)) {
upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def)); upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
} else { } else {
upb_string_unref(p[i]); upb_stdarray *str = p[i];
free(str->ptr);
free(str);
} }
} }
} }
@ -213,7 +212,9 @@ void upb_stdmsg_free(void *m, upb_msgdef *md) {
} else if (upb_issubmsg(f)) { } else if (upb_issubmsg(f)) {
upb_stdmsg_free(subp, upb_downcast_msgdef(f->def)); upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
} else { } else {
upb_string_unref(subp); upb_stdarray *str = subp;
free(str->ptr);
free(str);
} }
} }
free(m); free(m);

@ -148,7 +148,7 @@ typedef struct {
void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h); void upb_msgvisitor_init(upb_msgvisitor *v, upb_msgdef *md, upb_handlers *h);
void upb_msgvisitor_uninit(upb_msgvisitor *v); void upb_msgvisitor_uninit(upb_msgvisitor *v);
void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m); void upb_msgvisitor_reset(upb_msgvisitor *v, void *m);
void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status); void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
@ -183,8 +183,8 @@ upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
// if necessary. // if necessary.
typedef struct { typedef struct {
char *ptr; char *ptr;
int32_t len; // Number of elements present. uint32_t len; // Number of elements present.
int32_t size; // Number of elements allocated. uint32_t size; // Number of elements allocated.
} upb_stdarray; } upb_stdarray;
upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val); upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);

@ -9,96 +9,158 @@
#include <stddef.h> #include <stddef.h>
#include <stdlib.h> #include <stdlib.h>
#include "upb_string.h" #include <string.h>
// We can make this configurable if necessary. // We can make this configurable if necessary.
#define BLOCK_SIZE 4096 #define BUF_SIZE 32768
struct upb_stdio {
upb_bytesrc bytesrc;
upb_bytesink bytesink;
FILE *file;
};
void upb_stdio_reset(upb_stdio *stdio, FILE* file) { /* upb_bytesrc methods ********************************************************/
stdio->file = file;
int upb_stdio_cmpbuf(const void *_key, const void *_elem) {
const uint64_t *ofs = _key;
const upb_stdio_buf *buf = _elem;
return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE);
} }
static upb_stdio_buf *upb_stdio_findbuf(upb_stdio *s, uint64_t ofs) {
// TODO: it is probably faster to linear search short lists, and to
// special-case the last one or two bufs.
return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf);
}
/* upb_bytesrc methods ********************************************************/ //static upb_strlen_t upb_stdio_read(void *src, uint32_t ofs, upb_buf *b,
// upb_status *status) {
// upb_stdio *stdio = (upb_stdio*)src;
// size_t read = fread(buf, 1, BLOCK_SIZE, stdio->file);
// if(read < (size_t)BLOCK_SIZE) {
// // Error or EOF.
// if(feof(stdio->file)) {
// upb_seterr(status, UPB_EOF, "");
// } else if(ferror(stdio->file)) {
// upb_status_fromerrno(s);
// return 0;
// }
// }
// b->len = read;
// stdio->next_ofs += read;
// return stdio->next_ofs;
//}
size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
(void)src;
(void)ofs;
(void)s;
return 0;
}
static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, void upb_stdio_read(void *src, uint64_t src_ofs, size_t len, char *dst) {
upb_strlen_t count, upb_status *status) { upb_stdio_buf *buf = upb_stdio_findbuf(src, src_ofs);
upb_stdio *stdio = (upb_stdio*)src; src_ofs -= buf->ofs;
assert(count > 0); memcpy(dst, &buf->data[src_ofs], BUF_SIZE - src_ofs);
size_t read = fread(buf, 1, count, stdio->file); len -= (BUF_SIZE - src_ofs);
if(read < (size_t)count) { dst += (BUF_SIZE - src_ofs);
// Error or EOF. while (len > 0) {
if(feof(stdio->file)) { ++buf;
upb_seterr(status, UPB_EOF, ""); size_t bytes = UPB_MIN(len, BUF_SIZE);
return read; memcpy(dst, buf->data, bytes);
} else if(ferror(stdio->file)) { len -= bytes;
upb_seterr(status, UPB_ERROR, "Error reading from stdio stream."); dst += bytes;
return -1;
}
} }
return read;
} }
static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str, const char *upb_stdio_getptr(void *src, uint64_t ofs, size_t *len) {
upb_status *status) { upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
upb_strlen_t read = upb_stdio_read( ofs -= buf->ofs;
src, upb_string_getrwbuf(str, BLOCK_SIZE), BLOCK_SIZE, status); *len = BUF_SIZE - ofs;
if (read <= 0) return false; return &buf->data[ofs];
upb_string_getrwbuf(str, read); }
return true;
void upb_stdio_refregion(void *src, uint64_t ofs, size_t len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
len -= (BUF_SIZE - ofs);
++buf->refcount;
while (len > 0) {
++buf;
++buf->refcount;
}
}
void upb_stdio_unrefregion(void *src, uint64_t ofs, size_t len) {
(void)src;
(void)ofs;
(void)len;
} }
/* upb_bytesink methods *******************************************************/ /* upb_bytesink methods *******************************************************/
#if 0
upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) { upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str); upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if(written < len) { if(written < len) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1; return -1;
} }
return written; return written;
} }
#endif
upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status, uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
const char *fmt, va_list args) { const char *fmt, va_list args) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t written = vfprintf(stdio->file, fmt, args); int written = vfprintf(stdio->file, fmt, args);
if (written < 0) { if (written < 0) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream."); upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1; return -1;
} }
return written; return written;
} }
upb_stdio *upb_stdio_new() { void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = { static upb_bytesrc_vtbl bytesrc_vtbl = {
upb_stdio_fetch,
upb_stdio_read, upb_stdio_read,
upb_stdio_getstr, upb_stdio_getptr,
upb_stdio_refregion,
upb_stdio_unrefregion,
NULL,
NULL
}; };
upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
static upb_bytesink_vtbl bytesink_vtbl = { //static upb_bytesink_vtbl bytesink_vtbl = {
upb_stdio_putstr, // upb_stdio_putstr,
upb_stdio_vprintf // upb_stdio_vprintf
}; //};
//upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
}
upb_stdio *stdio = malloc(sizeof(*stdio)); void upb_stdio_reset(upb_stdio* stdio, FILE *file) {
upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); stdio->file = file;
upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); stdio->should_close = false;
return stdio; }
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s) {
FILE *f = fopen(filename, mode);
if (!f) {
upb_status_fromerrno(s);
return;
}
setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own.
upb_stdio_reset(stdio, f);
stdio->should_close = true;
} }
void upb_stdio_free(upb_stdio *stdio) { void upb_stdio_uninit(upb_stdio *stdio) {
free(stdio); // Can't report status; caller should flush() to ensure data is written.
if (stdio->should_close) fclose(stdio->file);
stdio->file = NULL;
} }
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->bytesrc; } upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->bytesink; } upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }

@ -5,7 +5,12 @@
* Author: Josh Haberman <jhaberman@gmail.com> * Author: Josh Haberman <jhaberman@gmail.com>
* *
* This file provides upb_bytesrc and upb_bytesink implementations for * This file provides upb_bytesrc and upb_bytesink implementations for
* ANSI C stdio. * ANSI C stdio, which is less efficient than posixfd, but more portable.
*
* Specifically, stdio functions acquire locks on every operation (unless you
* use the f{read,write,...}_unlocked variants, which are not standard) and
* performs redundant buffering (unless you disable it with setvbuf(), but we
* can only do this on newly-opened filehandles).
*/ */
#include <stdio.h> #include <stdio.h>
@ -18,21 +23,44 @@
extern "C" { extern "C" {
#endif #endif
struct upb_stdio; typedef struct {
typedef struct upb_stdio upb_stdio; uint64_t ofs;
uint32_t refcount;
char data[];
} upb_stdio_buf;
// We use a single object for both bytesrc and bytesink for simplicity.
// The object is still not thread-safe, and may only be used by one reader
// and one writer at a time.
typedef struct {
upb_bytesrc src;
upb_bytesink sink;
FILE *file;
bool should_close;
upb_stdio_buf **bufs;
uint32_t nbuf, szbuf;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
// Caller should call upb_stdio_flush prior to calling this to ensure that
// all data is flushed, otherwise data can be silently dropped if an error
// occurs flushing the remaining buffers.
void upb_stdio_uninit(upb_stdio *stdio);
// Resets the object to read/write to the given "file." The caller is
// responsible for closing the file, which must outlive this object.
void upb_stdio_reset(upb_stdio *stdio, FILE *file);
// Creation/deletion. // As an alternative to upb_stdio_reset(), initializes the object by opening a
upb_stdio *upb_stdio_new(); // file, and will handle closing it. This may result in more efficient I/O
void upb_stdio_free(upb_stdio *stdio); // than the previous since we can call setvbuf() to disable buffering.
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s);
// Reset/initialize the object for use. The src or sink will call // Must be called to cleanup after the object, including closing the file if
// fread()/fwrite()/etc. on the given FILE*. // it was opened with upb_stdio_open() (which can fail, hence the status).
void upb_stdio_reset(upb_stdio *stdio, FILE* file); //
// Gets a bytesrc or bytesink for the given stdio. The returned pointer is
// invalidated by upb_stdio_reset above. It is perfectly valid to get both
// a bytesrc and a bytesink for the same stdio if the FILE* is open for reading
// and writing.
upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio); upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio); upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);

@ -1,164 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb_string.h"
#include <stdlib.h>
#ifdef __GLIBC__
#include <malloc.h>
#elif defined(__APPLE__)
#include <malloc/malloc.h>
#endif
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
upb_string *upb_string_new() {
upb_string *str = malloc(sizeof(*str));
str->ptr = NULL;
str->cached_mem = NULL;
str->len = 0;
#ifndef UPB_HAVE_MSIZE
str->size = 0;
#endif
str->src = NULL;
upb_atomic_init(&str->refcount, 1);
return str;
}
uint32_t upb_string_size(upb_string *str) {
#ifdef __GLIBC__
return malloc_usable_size(str->cached_mem);
#elif defined(__APPLE__)
return malloc_size(str->cached_mem);
#else
return str->size;
#endif
}
void _upb_string_free(upb_string *str) {
free(str->cached_mem);
_upb_string_release(str);
free(str);
}
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len) {
// assert(str->ptr == NULL);
upb_strlen_t size = upb_string_size(str);
if (size < len) {
size = upb_round_up_pow2(len);
str->cached_mem = realloc(str->cached_mem, size);
#ifndef UPB_HAVE_MSIZE
str->size = size;
#endif
}
str->len = len;
str->ptr = str->cached_mem;
return str->cached_mem;
}
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len) {
assert(str->ptr == NULL);
assert(start + len <= upb_string_len(target_str));
if (target_str->src) {
start += (target_str->ptr - target_str->src->ptr);
target_str = target_str->src;
}
str->src = upb_string_getref(target_str);
str->ptr = upb_string_getrobuf(target_str) + start;
str->len = len;
}
size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format,
va_list args) {
// Try once without reallocating. We have to va_copy because we might have
// to call vsnprintf again.
uint32_t size = UPB_MAX(upb_string_size(str) - offset, 16);
char *buf = upb_string_getrwbuf(str, offset + size) + offset;
va_list args_copy;
va_copy(args_copy, args);
uint32_t true_size = vsnprintf(buf, size, format, args_copy);
va_end(args_copy);
// Resize to be the correct size.
if (true_size >= size) {
// Need to print again, because some characters were truncated. vsnprintf
// has weird behavior (and contrary IMO to what the standard says): it will
// not write the entire string unless you give it space to store the NULL
// terminator also. So we can't give it space for the string itself and
// let NULL get truncated (after all, we don't care about it): we *must*
// give it space for NULL.
buf = upb_string_getrwbuf(str, offset + true_size + 1) + offset;
vsnprintf(buf, true_size + 1, format, args);
}
str->len = offset + true_size;
return true_size;
}
upb_string *upb_string_asprintf(const char *format, ...) {
upb_string *str = upb_string_new();
va_list args;
va_start(args, format);
upb_string_vprintf(str, format, args);
va_end(args);
return str;
}
upb_string *upb_strdup(upb_string *s) {
upb_string *str = upb_string_new();
upb_strcpy(str, s);
return str;
}
void upb_strcat(upb_string *s, upb_string *append) {
uint32_t old_size = upb_string_len(s);
uint32_t append_size = upb_string_len(append);
uint32_t new_size = old_size + append_size;
char *buf = upb_string_getrwbuf(s, new_size);
memcpy(buf + old_size, upb_string_getrobuf(append), append_size);
}
upb_string *upb_strreadfile(const char *filename) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
upb_string *s = upb_string_new();
char *buf = upb_string_getrwbuf(s, size);
if(fread(buf, size, 1, f) != 1) goto error;
fclose(f);
return s;
error:
fclose(f);
return NULL;
}
upb_string *upb_emptystring() {
static upb_string empty = UPB_STATIC_STRING("");
return &empty;
}
char *upb_string_newcstr(upb_string *str) {
upb_strlen_t len = upb_string_len(str);
char *ret = malloc(len+1);
memcpy(ret, upb_string_getrobuf(str), len);
ret[len] = '\0';
return ret;
}

@ -1,394 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* This file defines a simple string type which is length-delimited instead
* of NULL-terminated, and which has useful sharing semantics.
*
* The overriding goal of upb_string is to avoid memcpy(), malloc(), and free()
* wheverever possible, while keeping both CPU and memory overhead low.
* Throughout upb there are situations where one wants to reference all or part
* of another string without copying. upb_string provides APIs for doing this,
* and allows the referenced string to be kept alive for as long as anyone is
* referencing it.
*
* Characteristics of upb_string:
* - strings are reference-counted.
* - strings are immutable (can be mutated only when first created or recycled).
* - if a string has no other referents, it can be "recycled" into a new string
* without having to reallocate the upb_string.
* - strings can be substrings of other strings (owning a ref on the source
* string).
*
* Reference-counted strings have recently fallen out of favor because of the
* performance impacts of doing thread-safe reference counting with atomic
* operations. We side-step this issue by not performing atomic operations
* unless the string has been marked thread-safe. Time will tell whether this
* scheme is easy and convenient enough to be practical.
*
* Strings are expected to be 8-bit-clean, but "char*" is such an entrenched
* idiom that we go with it instead of making our pointers uint8_t*.
*
* WARNING: THE GETREF, UNREF, AND RECYCLE OPERATIONS ARE NOT THREAD_SAFE
* UNLESS THE STRING HAS BEEN MARKED SYNCHRONIZED! What this means is that if
* you are logically passing a reference to a upb_string to another thread
* (which implies that the other thread must eventually call unref of recycle),
* you have two options:
*
* - create a copy of the string that will be used in the other thread only.
* - call upb_string_get_synchronized_ref(), which will make getref, unref, and
* recycle thread-safe for this upb_string.
*/
#ifndef UPB_STRING_H
#define UPB_STRING_H
#include <assert.h>
#include <string.h>
#include <stdarg.h>
#include "upb_atomic.h"
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
// All members of this struct are private, and may only be read/written through
// the associated functions.
struct _upb_string {
// The string's refcount.
upb_atomic_t refcount;
// The pointer to our currently active data. This may be memory we own
// or a pointer into memory we don't own.
const char *ptr;
// If non-NULL, this is a block of memory we own. We keep this cached even
// if "ptr" is currently aliasing memory we don't own.
char *cached_mem;
// The effective length of the string (the bytes at ptr).
int32_t len;
#ifndef UPB_HAVE_MSIZE
// How many bytes are allocated in cached_mem.
//
// Many platforms have a function that can tell you the size of a block
// that was previously malloc'd. In this case we can avoid storing the
// size explicitly.
uint32_t size;
#endif
// Used if this is a slice of another string, NULL otherwise. We own a ref
// on src.
struct _upb_string *src;
};
// Internal-only initializer for upb_string instances.
#ifdef UPB_HAVE_MSIZE
#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, NULL}
#else
#define _UPB_STRING_INIT(str, len, refcount) {{refcount}, (char*)str, NULL, len, 0, NULL}
#endif
// Special pseudo-refcounts for static/stack-allocated strings, respectively.
#define _UPB_STRING_REFCOUNT_STATIC -1
#define _UPB_STRING_REFCOUNT_STACK -2
// Returns a newly-created, empty, non-finalized string. When the string is no
// longer needed, it should be unref'd, never freed directly.
upb_string *upb_string_new();
// Internal-only; clients should call upb_string_unref().
void _upb_string_free(upb_string *str);
// Releases a ref on the given string, which may free the memory. "str"
// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
// UNLESS THE STRING IS SYNCHRONIZED.
INLINE void upb_string_unref(upb_string *str) {
if (str) {
}
if (str && upb_atomic_read(&str->refcount) > 0 &&
upb_atomic_unref(&str->refcount)) {
_upb_string_free(str);
}
}
static void _upb_string_release(upb_string *str) {
if(str->src) {
upb_string_unref(str->src);
str->src = NULL;
}
}
upb_string *upb_strdup(upb_string *s); // Forward-declare.
// Returns a string with the same contents as "str". The caller owns a ref on
// the returned string, which may or may not be the same object as "str.
// WARNING: NOT THREAD-SAFE UNLESS THE STRING IS SYNCHRONIZED!
INLINE upb_string *upb_string_getref(upb_string *str) {
int refcount = upb_atomic_read(&str->refcount);
if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
// We don't ref the special <0 refcount for static strings.
if (refcount > 0) {
upb_atomic_ref(&str->refcount);
}
return str;
}
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
INLINE bool upb_string_isempty(upb_string *str) {
return !str || upb_string_len(str) == 0;
}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
// upb_string_getrobuf() and upb_string_endread() should be kept as short as
// possible, because any pending upb_string_detach() may be blocked until
// upb_string_endread is called(). No other functions may be called on the
// string during this window except upb_string_len().
INLINE const char *upb_string_getrobuf(upb_string *str) { return str->ptr; }
INLINE void upb_string_endread(upb_string *str) { (void)str; }
// Convenience method for getting the end of the string. Calls
// upb_string_getrobuf() so inherits the caveats of calling that function.
INLINE const char *upb_string_getbufend(upb_string *str) {
return upb_string_getrobuf(str) + upb_string_len(str);
}
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. The caller MUST own a reference on the given string
// prior to making this call (ie. the caller must have either created the
// string or obtained a reference with upb_string_getref()).
//
// After the function returns, "str" points to a writable string, which is
// either the original string if it had no other references or a newly created
// string if it did have other references.
//
// As a special case, passing a pointer to NULL will allocate a new string.
// This is convenient for the pattern:
//
// upb_string *str = NULL;
// while (x) {
// if (y) {
// upb_string_recycle(&str);
// upb_src_getstr(str);
// }
// }
INLINE void upb_string_recycle(upb_string **_str) {
upb_string *str = *_str;
int r;
if(str && ((r = upb_atomic_read(&str->refcount)) == 1 ||
(r == _UPB_STRING_REFCOUNT_STACK))) {
str->ptr = NULL;
str->len = 0;
_upb_string_release(str);
} else {
//if (!str) {
// printf("!str\n");
//}
//else if (upb_atomic_read(&str->refcount) != 1) { printf("refcount: %d\n", upb_atomic_read(&str->refcount)); }
//else { printf("Some other reason.\n"); }
upb_string_unref(str);
*_str = upb_string_new();
}
}
// The options for setting the contents of a string. These may only be called
// when a string is first created or recycled; once other functions have been
// called on the string, these functions are not allowed until the string is
// recycled.
// Gets a pointer suitable for writing to the string, which is guaranteed to
// have at least "len" bytes of data available. The size of the string will
// become "len".
char *upb_string_getrwbuf(upb_string *str, upb_strlen_t len);
// Replaces the contents of str with the contents of the given printf.
size_t upb_string_vprintf_at(upb_string *str, size_t offset, const char *format,
va_list args);
INLINE size_t upb_string_vprintf(upb_string *str, const char *format,
va_list args) {
return upb_string_vprintf_at(str, 0, format, args);
}
INLINE size_t upb_string_printf(upb_string *str, const char *format, ...) {
va_list args;
va_start(args, format);
size_t written = upb_string_vprintf(str, format, args);
va_end(args);
return written;
}
// Sets the contents of "str" to be the given substring of "target_str", to
// which the caller must own a ref.
void upb_string_substr(upb_string *str, upb_string *target_str,
upb_strlen_t start, upb_strlen_t len);
// Sketch of an API for allowing upb_strings to reference external, unowned
// data. Waiting for a clear use case before actually implementing it.
//
// Makes the string "str" a reference to the given string data. The caller
// guarantees that the given string data will not change or be deleted until a
// matching call to upb_string_detach(), which may block until any concurrent
// readers have finished reading. upb_string_detach() preserves the contents
// of the string by copying the referenced data if there are any other
// referents.
// void upb_string_attach(upb_string *str, char *ptr, upb_strlen_t len);
// void upb_string_detach(upb_string *str);
// Allows using upb_strings in printf, ie:
// upb_strptr str = UPB_STRLIT("Hello, World!\n");
// printf("String is: " UPB_STRFMT, UPB_STRARG(str)); */
#define UPB_STRARG(str) upb_string_len(str), upb_string_getrobuf(str)
#define UPB_STRFMT "%.*s"
// Macros for constructing upb_string objects statically or on the stack. These
// can be used like:
//
// upb_string static_str = UPB_STATIC_STRING("Foo");
//
// int main() {
// upb_string stack_str = UPB_STACK_STRING("Foo");
// // Now:
// // upb_streql(&static_str, &stack_str) == true
// // upb_streql(&static_str, UPB_STRLIT("Foo")) == true
// }
//
// You can also use UPB_STACK_STRING or UPB_STATIC_STRING with character arrays,
// but you must not change the underlying data once you've passed the string on:
//
// void foo() {
// char data[] = "ABC123";
// upb_string stack_str = UPB_STACK_STR(data);
// bar(&stack_str);
// data[0] = "B"; // NOT ALLOWED!!
// }
//
// TODO: should the stack business just be like attach/detach? The latter seems
// more flexible, though it does require a stack allocation. Maybe put this off
// until there is a clear use case.
#define UPB_STATIC_STRING(str) \
_UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STATIC_STRING_ARRAY(str) \
_UPB_STRING_INIT(str, sizeof(str), _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STATIC_STRING_LEN(str, len) \
_UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STATIC)
#define UPB_STACK_STRING(str) \
_UPB_STRING_INIT(str, sizeof(str)-1, _UPB_STRING_REFCOUNT_STACK)
#define UPB_STACK_STRING_LEN(str, len) \
_UPB_STRING_INIT(str, len, _UPB_STRING_REFCOUNT_STACK)
// A convenient way of specifying upb_strings as literals, like:
//
// upb_streql(UPB_STRLIT("expected"), other_str);
//
// However, this requires either C99 compound initializers or C++.
// Must ONLY be called with a string literal as its argument!
//#ifdef __cplusplus
//namespace upb {
//class String : public upb_string {
// // This constructor must ONLY be called with a string literal.
// String(const char *str) : upb_string(UPB_STATIC_STRING(str)) {}
//};
//}
//#define UPB_STRLIT(str) upb::String(str)
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
// Returns a singleton empty string.
upb_string *upb_emptystring();
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
// overflow. For the most part these only use the public upb_string interface.
// More efficient than upb_strcmp if all you need is to test equality.
INLINE bool upb_streql(upb_string *s1, upb_string *s2) {
upb_strlen_t len = upb_string_len(s1);
if(len != upb_string_len(s2)) {
return false;
} else {
bool ret =
memcmp(upb_string_getrobuf(s1), upb_string_getrobuf(s2), len) == 0;
upb_string_endread(s1);
upb_string_endread(s2);
return ret;
}
}
// Like strcmp().
int upb_strcmp(upb_string *s1, upb_string *s2);
// Compare a upb_string with memory or a NULL-terminated C string.
INLINE bool upb_streqllen(upb_string *str, const void *buf, upb_strlen_t len) {
return len == upb_string_len(str) &&
memcmp(upb_string_getrobuf(str), buf, len) == 0;
}
INLINE bool upb_streqlc(upb_string *str, const void *buf) {
// Could be made one-pass.
return upb_streqllen(str, buf, strlen((const char*)buf));
}
// Like upb_strcpy, but copies from a buffer and length.
INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
memcpy(upb_string_getrwbuf(dest, len), src, len);
}
// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
upb_string_endread(src);
}
// Like upb_strcpy, but copies from a NULL-terminated string.
INLINE void upb_strcpyc(upb_string *dest, const void *src) {
// This does two passes over src, but that is necessary unless we want to
// repeatedly re-allocate dst, which seems worse.
upb_strcpylen(dest, src, strlen((const char*)src));
}
// Returns a new string whose contents are a copy of s.
upb_string *upb_strdup(upb_string *s);
// Like upb_strdup(), but duplicates a given buffer and length.
INLINE upb_string *upb_strduplen(const void *src, upb_strlen_t len) {
upb_string *s = upb_string_new();
upb_strcpylen(s, src, len);
return s;
}
// Like upb_strdup(), but duplicates a C NULL-terminated string.
INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
// Returns a newly-allocated NULL-terminated copy of str.
char *upb_string_newcstr(upb_string *str);
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);
// Returns a new string that is a substring of the given string.
INLINE upb_string *upb_strslice(upb_string *s, int offset, int len) {
upb_string *str = upb_string_new();
upb_string_substr(str, s, offset, len);
return str;
}
// Reads an entire file into a newly-allocated string.
upb_string *upb_strreadfile(const char *filename);
// Returns a new string with the contents of the given printf.
upb_string *upb_string_asprintf(const char *format, ...);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -8,61 +8,45 @@
#include "upb_strstream.h" #include "upb_strstream.h"
#include <stdlib.h> #include <stdlib.h>
#include "upb_string.h"
/* upb_stringsrc **************************************************************/ /* upb_stringsrc **************************************************************/
static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf, size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
upb_strlen_t count, upb_status *status) { upb_stringsrc *src = _src;
upb_stringsrc *src = (upb_stringsrc*)_src; size_t bytes = src->len - ofs;
if (src->offset == upb_string_len(src->str)) { if (bytes == 0) s->code = UPB_EOF;
status->code = UPB_EOF; return bytes;
return -1;
} else {
upb_strlen_t to_read = UPB_MIN(count, upb_string_len(src->str) - src->offset);
memcpy(buf, upb_string_getrobuf(src->str) + src->offset, to_read);
src->offset += to_read;
return to_read;
}
} }
static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str, void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {
upb_status *status) { upb_stringsrc *src = _src;
upb_stringsrc *src = (upb_stringsrc*)_src; memcpy(dst, src->str + src_ofs, len);
if (src->offset == upb_string_len(src->str)) { }
status->code = UPB_EOF;
return false; const char *upb_stringsrc_getptr(void *_src, uint64_t ofs, size_t *len) {
} else { upb_stringsrc *src = _src;
upb_strlen_t len = upb_string_len(src->str) - src->offset; *len = src->len - ofs;
upb_string_substr(str, src->str, src->offset, len); return src->str + ofs;
src->offset += len;
assert(src->offset == upb_string_len(src->str));
return true;
}
} }
void upb_stringsrc_init(upb_stringsrc *s) { void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl vtbl = { static upb_bytesrc_vtbl vtbl = {
upb_stringsrc_read, &upb_stringsrc_fetch,
upb_stringsrc_getstr, &upb_stringsrc_read,
&upb_stringsrc_getptr,
NULL, NULL, NULL, NULL
}; };
upb_bytesrc_init(&s->bytesrc, &vtbl); upb_bytesrc_init(&s->bytesrc, &vtbl);
s->str = NULL; s->str = NULL;
} }
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) { void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len) {
if (str != s->str) { s->str = str;
upb_string_unref(s->str); s->len = len;
s->str = upb_string_getref(str);
}
s->offset = 0;
}
void upb_stringsrc_uninit(upb_stringsrc *s) {
upb_string_unref(s->str);
} }
void upb_stringsrc_uninit(upb_stringsrc *s) { (void)s; }
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) { upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc; return &s->bytesrc;
@ -72,44 +56,49 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
/* upb_stringsink *************************************************************/ /* upb_stringsink *************************************************************/
void upb_stringsink_uninit(upb_stringsink *s) { void upb_stringsink_uninit(upb_stringsink *s) {
upb_string_unref(s->str); free(s->str);
} }
// Resets the stringsink to a state where it will append to the given string. // Resets the stringsink to a state where it will append to the given string.
// The string must be newly created or recycled. The stringsink will take a // The string must be newly created or recycled. The stringsink will take a
// reference on the string, so the caller need not ensure that it outlives the // reference on the string, so the caller need not ensure that it outlives the
// stringsink. A stringsink can be reset multiple times. // stringsink. A stringsink can be reset multiple times.
void upb_stringsink_reset(upb_stringsink *s, upb_string *str) { void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size) {
if (str != s->str) { free(s->str);
upb_string_unref(s->str); s->str = str;
s->str = upb_string_getref(str); s->len = 0;
} s->size = size;
// Resize to 0.
upb_string_getrwbuf(s->str, 0);
} }
upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) { upb_bytesink *upb_stringsink_bytesink(upb_stringsink *s) {
return &s->bytesink; return &s->bytesink;
} }
static upb_strlen_t upb_stringsink_vprintf(upb_bytesink *_sink, upb_status *s, static int32_t upb_stringsink_vprintf(void *_s, upb_status *status,
const char *fmt, va_list args) { const char *fmt, va_list args) {
(void)s; // No errors can occur. (void)status; // TODO: report realloc() errors.
upb_stringsink *sink = (upb_stringsink*)_sink; upb_stringsink *s = _s;
return upb_string_vprintf_at(sink->str, upb_string_len(sink->str), fmt, args); int ret = upb_vrprintf(&s->str, &s->size, s->len, fmt, args);
if (ret >= 0) s->len += ret;
return ret;
} }
static upb_strlen_t upb_stringsink_putstr(upb_bytesink *_sink, upb_string *str, bool upb_stringsink_write(void *_s, const char *buf, size_t len,
upb_status *s) { upb_status *status) {
(void)s; // No errors can occur. (void)status; // TODO: report realloc() errors.
upb_stringsink *sink = (upb_stringsink*)_sink; upb_stringsink *s = _s;
upb_strcat(sink->str, str); if (s->len + len > s->size) {
return upb_string_len(str); while(s->len + len > s->size) s->size *= 2;
s->str = realloc(s->str, s->size);
}
memcpy(s->str + s->len, buf, len);
s->len += len;
return true;
} }
void upb_stringsink_init(upb_stringsink *s) { void upb_stringsink_init(upb_stringsink *s) {
static upb_bytesink_vtbl vtbl = { static upb_bytesink_vtbl vtbl = {
upb_stringsink_putstr, upb_stringsink_write,
upb_stringsink_vprintf upb_stringsink_vprintf
}; };
upb_bytesink_init(&s->bytesink, &vtbl); upb_bytesink_init(&s->bytesink, &vtbl);

@ -21,8 +21,8 @@ extern "C" {
struct _upb_stringsrc { struct _upb_stringsrc {
upb_bytesrc bytesrc; upb_bytesrc bytesrc;
upb_string *str; const char *str;
upb_strlen_t offset; size_t len;
}; };
typedef struct _upb_stringsrc upb_stringsrc; typedef struct _upb_stringsrc upb_stringsrc;
@ -33,9 +33,9 @@ void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The // Resets the stringsrc to a state where it will vend the given string. The
// stringsrc will take a reference on the string, so the caller need not ensure // stringsrc will take a reference on the string, so the caller need not ensure
// that it outlives the stringsrc. A stringsrc can be reset multiple times. // that it outlives the stringsrc. A stringsrc can be reset multiple times.
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str); void upb_stringsrc_reset(upb_stringsrc *s, const char *str, size_t len);
// Returns the upb_bytesrc* for this stringsrc. Invalidated by reset above. // Returns the upb_bytesrc* for this stringsrc.
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s); upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
@ -43,7 +43,8 @@ upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s);
struct _upb_stringsink { struct _upb_stringsink {
upb_bytesink bytesink; upb_bytesink bytesink;
upb_string *str; char *str;
size_t len, size;
}; };
typedef struct _upb_stringsink upb_stringsink; typedef struct _upb_stringsink upb_stringsink;
@ -51,11 +52,14 @@ typedef struct _upb_stringsink upb_stringsink;
void upb_stringsink_init(upb_stringsink *s); void upb_stringsink_init(upb_stringsink *s);
void upb_stringsink_uninit(upb_stringsink *s); void upb_stringsink_uninit(upb_stringsink *s);
// Resets the stringsink to a state where it will append to the given string. // Resets the sink's string to "str", which the sink takes ownership of.
// The string must be newly created or recycled. The stringsink will take a // "str" may be NULL, which will make the sink allocate a new string.
// reference on the string, so the caller need not ensure that it outlives the void upb_stringsink_reset(upb_stringsink *s, char *str, size_t size);
// stringsink. A stringsink can be reset multiple times.
void upb_stringsink_reset(upb_stringsink *s, upb_string *str); // Releases ownership of the returned string (which is "len" bytes long) and
// resets the internal string to be empty again (as if reset were called with
// NULL).
const char *upb_stringsink_release(upb_stringsink *s, size_t *len);
// Returns the upb_bytesink* for this stringsrc. Invalidated by reset above. // Returns the upb_bytesink* for this stringsrc. Invalidated by reset above.
upb_bytesink *upb_stringsink_bytesink(); upb_bytesink *upb_stringsink_bytesink();

@ -97,7 +97,7 @@ static uint32_t empty_intbucket(upb_inttable *table)
// The insert routines have a lot more code duplication between int/string // The insert routines have a lot more code duplication between int/string
// variants than I would like, but there's just a bit too much that varies to // variants than I would like, but there's just a bit too much that varies to
// parameterize them. // parameterize them.
static void intinsert(upb_inttable *t, upb_inttable_key_t key, void *val) { static void intinsert(upb_inttable *t, uint32_t key, const void *val) {
assert(upb_inttable_lookup(t, key) == NULL); assert(upb_inttable_lookup(t, key) == NULL);
upb_inttable_value *table_val; upb_inttable_value *table_val;
if (_upb_inttable_isarrkey(t, key)) { if (_upb_inttable_isarrkey(t, key)) {
@ -160,7 +160,7 @@ static void upb_inttable_insertall(upb_inttable *dst, upb_inttable *src) {
} }
} }
void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) { void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val) {
if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) { if((double)(t->t.count + 1) / upb_inttable_hashtablesize(t) > MAX_LOAD) {
//printf("RESIZE!\n"); //printf("RESIZE!\n");
// Need to resize. Allocate new table with double the size of however many // Need to resize. Allocate new table with double the size of however many
@ -181,7 +181,7 @@ void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val) {
void upb_inttable_compact(upb_inttable *t) { void upb_inttable_compact(upb_inttable *t) {
// Find the largest array part we can that satisfies the MIN_DENSITY // Find the largest array part we can that satisfies the MIN_DENSITY
// definition. For now we just count down powers of two. // definition. For now we just count down powers of two.
upb_inttable_key_t largest_key = 0; uint32_t largest_key = 0;
for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
i = upb_inttable_next(t, i)) { i = upb_inttable_next(t, i)) {
largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i)); largest_key = UPB_MAX(largest_key, upb_inttable_iter_key(i));
@ -260,6 +260,8 @@ upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter) {
/* upb_strtable ***************************************************************/ /* upb_strtable ***************************************************************/
static upb_strtable_entry *strent(upb_strtable *t, int32_t i) { static upb_strtable_entry *strent(upb_strtable *t, int32_t i) {
//fprintf(stderr, "i: %d, table_size: %d\n", i, upb_table_size(&t->t));
assert(i <= (int32_t)upb_table_size(&t->t));
return UPB_INDEX(t->t.entries, i, t->t.entry_size); return UPB_INDEX(t->t.entries, i, t->t.entry_size);
} }
@ -267,121 +269,134 @@ static uint32_t upb_strtable_size(upb_strtable *t) {
return upb_table_size(&t->t); return upb_table_size(&t->t);
} }
void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t entsize) { void upb_strtable_init(upb_strtable *t, uint32_t size, uint16_t valuesize) {
t->t.value_size = valuesize;
size_t entsize = upb_align_up(sizeof(upb_strtable_header) + valuesize, 8);
upb_table_init(&t->t, size, entsize); upb_table_init(&t->t, size, entsize);
for (uint32_t i = 0; i < upb_table_size(&t->t); i++) { for (uint32_t i = 0; i < upb_table_size(&t->t); i++) {
upb_strtable_entry *e = strent(t, i); upb_strtable_entry *e = strent(t, i);
e->key = NULL; e->hdr.key = NULL;
e->next = UPB_END_OF_CHAIN; e->hdr.next = UPB_END_OF_CHAIN;
} }
} }
void upb_strtable_free(upb_strtable *t) { void upb_strtable_free(upb_strtable *t) {
// Free refs from the strtable. // Free keys from the strtable.
upb_strtable_entry *e = upb_strtable_begin(t); upb_strtable_iter i;
for(; e; e = upb_strtable_next(t, e)) { for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i))
upb_string_unref(e->key); free((char*)upb_strtable_iter_key(&i));
}
upb_table_free(&t->t); upb_table_free(&t->t);
} }
static uint32_t strtable_bucket(upb_strtable *t, upb_string *key) static uint32_t strtable_bucket(upb_strtable *t, const char *key) {
{ uint32_t hash = MurmurHash2(key, strlen(key), 0);
uint32_t hash = MurmurHash2(upb_string_getrobuf(key), upb_string_len(key), 0);
return (hash & t->t.mask); return (hash & t->t.mask);
} }
void *upb_strtable_lookup(upb_strtable *t, upb_string *key) void *upb_strtable_lookup(upb_strtable *t, const char *key) {
{
uint32_t bucket = strtable_bucket(t, key); uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *e; upb_strtable_entry *e;
do { do {
e = strent(t, bucket); e = strent(t, bucket);
if(e->key && upb_streql(e->key, key)) return e; if(e->hdr.key && strcmp(e->hdr.key, key) == 0) return &e->val;
} while((bucket = e->next) != UPB_END_OF_CHAIN); } while((bucket = e->hdr.next) != UPB_END_OF_CHAIN);
return NULL; return NULL;
} }
static uint32_t empty_strbucket(upb_strtable *table) void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len) {
{ // TODO: improve.
char key2[len+1];
memcpy(key2, key, len);
key2[len] = '\0';
return upb_strtable_lookup(t, key2);
}
static uint32_t empty_strbucket(upb_strtable *table) {
// TODO: does it matter that this is biased towards the front of the table? // TODO: does it matter that this is biased towards the front of the table?
for(uint32_t i = 0; i < upb_strtable_size(table); i++) { for(uint32_t i = 0; i < upb_strtable_size(table); i++) {
upb_strtable_entry *e = strent(table, i); upb_strtable_entry *e = strent(table, i);
if(!e->key) return i; if(!e->hdr.key) return i;
} }
assert(false); assert(false);
return 0; return 0;
} }
static void strinsert(upb_strtable *t, upb_strtable_entry *e) static void strinsert(upb_strtable *t, const char *key, const void *val) {
{ assert(upb_strtable_lookup(t, key) == NULL);
assert(upb_strtable_lookup(t, e->key) == NULL);
e->key = upb_string_getref(e->key);
t->t.count++; t->t.count++;
uint32_t bucket = strtable_bucket(t, e->key); uint32_t bucket = strtable_bucket(t, key);
upb_strtable_entry *table_e = strent(t, bucket); upb_strtable_entry *table_e = strent(t, bucket);
if(table_e->key) { /* Collision. */ if(table_e->hdr.key) { /* Collision. */
if(bucket == strtable_bucket(t, table_e->key)) { if(bucket == strtable_bucket(t, table_e->hdr.key)) {
/* Existing element is in its main posisiton. Find an empty slot to /* Existing element is in its main posisiton. Find an empty slot to
* place our new element and append it to this key's chain. */ * place our new element and append it to this key's chain. */
uint32_t empty_bucket = empty_strbucket(t); uint32_t empty_bucket = empty_strbucket(t);
while (table_e->next != UPB_END_OF_CHAIN) while (table_e->hdr.next != UPB_END_OF_CHAIN)
table_e = strent(t, table_e->next); table_e = strent(t, table_e->hdr.next);
table_e->next = empty_bucket; table_e->hdr.next = empty_bucket;
table_e = strent(t, empty_bucket); table_e = strent(t, empty_bucket);
} else { } else {
/* Existing element is not in its main position. Move it to an empty /* Existing element is not in its main position. Move it to an empty
* slot and put our element in its main position. */ * slot and put our element in its main position. */
uint32_t empty_bucket = empty_strbucket(t); uint32_t empty_bucket = empty_strbucket(t);
uint32_t evictee_bucket = strtable_bucket(t, table_e->key); uint32_t evictee_bucket = strtable_bucket(t, table_e->hdr.key);
memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */ memcpy(strent(t, empty_bucket), table_e, t->t.entry_size); /* copies next */
upb_strtable_entry *evictee_e = strent(t, evictee_bucket); upb_strtable_entry *evictee_e = strent(t, evictee_bucket);
while(1) { while(1) {
assert(evictee_e->key); assert(evictee_e->hdr.key);
assert(evictee_e->next != UPB_END_OF_CHAIN); assert(evictee_e->hdr.next != UPB_END_OF_CHAIN);
if(evictee_e->next == bucket) { if(evictee_e->hdr.next == bucket) {
evictee_e->next = empty_bucket; evictee_e->hdr.next = empty_bucket;
break; break;
} }
evictee_e = strent(t, evictee_e->next); evictee_e = strent(t, evictee_e->hdr.next);
} }
/* table_e remains set to our mainpos. */ /* table_e remains set to our mainpos. */
} }
} }
memcpy(table_e, e, t->t.entry_size); //fprintf(stderr, "val: %p\n", val);
table_e->next = UPB_END_OF_CHAIN; //fprintf(stderr, "val size: %d\n", t->t.value_size);
//printf("Looking up, string=" UPB_STRFMT "...\n", UPB_STRARG(e->key)); memcpy(&table_e->val, val, t->t.value_size);
assert(upb_strtable_lookup(t, e->key) == table_e); table_e->hdr.key = strdup(key);
table_e->hdr.next = UPB_END_OF_CHAIN;
//fprintf(stderr, "Looking up, string=%s...\n", key);
assert(upb_strtable_lookup(t, key) == &table_e->val);
//printf("Yay!\n"); //printf("Yay!\n");
} }
void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *e) void upb_strtable_insert(upb_strtable *t, const char *key, const void *val) {
{
if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) { if((double)(t->t.count + 1) / upb_strtable_size(t) > MAX_LOAD) {
// Need to resize. New table of double the size, add old elements to it. // Need to resize. New table of double the size, add old elements to it.
//printf("RESIZE!!\n"); //printf("RESIZE!!\n");
upb_strtable new_table; upb_strtable new_table;
upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.entry_size); upb_strtable_init(&new_table, upb_strtable_size(t)*2, t->t.value_size);
upb_strtable_entry *old_e; upb_strtable_iter i;
for(old_e = upb_strtable_begin(t); old_e; old_e = upb_strtable_next(t, old_e)) upb_strtable_begin(&i, t);
strinsert(&new_table, old_e); for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
strinsert(&new_table,
upb_strtable_iter_key(&i),
upb_strtable_iter_value(&i));
}
upb_strtable_free(t); upb_strtable_free(t);
*t = new_table; *t = new_table;
} }
strinsert(t, e); strinsert(t, key, val);
} }
void *upb_strtable_begin(upb_strtable *t) { void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t) {
return upb_strtable_next(t, strent(t, -1)); i->e = strent(t, -1);
i->t = t;
upb_strtable_next(i);
} }
void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur) { void upb_strtable_next(upb_strtable_iter *i) {
upb_strtable_entry *end = strent(t, upb_strtable_size(t)); upb_strtable_entry *end = strent(i->t, upb_strtable_size(i->t));
upb_strtable_entry *cur = i->e;
do { do {
cur = (void*)((char*)cur + t->t.entry_size); cur = (void*)((char*)cur + i->t->t.entry_size);
if(cur == end) return NULL; if(cur == end) { i->e = NULL; return; }
} while(cur->key == NULL); } while(cur->hdr.key == NULL);
return cur; i->e = cur;
} }
#ifdef UPB_UNALIGNED_READS_OK #ifdef UPB_UNALIGNED_READS_OK

@ -18,14 +18,11 @@
#include <assert.h> #include <assert.h>
#include "upb.h" #include "upb.h"
#include "upb_string.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef uint32_t upb_inttable_key_t;
#define UPB_END_OF_CHAIN (uint32_t)-1 #define UPB_END_OF_CHAIN (uint32_t)-1
typedef struct { typedef struct {
@ -34,7 +31,7 @@ typedef struct {
} upb_inttable_value; } upb_inttable_value;
typedef struct { typedef struct {
upb_inttable_key_t key; uint32_t key;
uint32_t next; // Internal chaining. uint32_t next; // Internal chaining.
} upb_inttable_header; } upb_inttable_header;
@ -48,8 +45,13 @@ typedef struct {
// performance by letting us compare hashes before comparing lengths or the // performance by letting us compare hashes before comparing lengths or the
// strings themselves. // strings themselves.
typedef struct { typedef struct {
upb_string *key; // We own a ref. char *key; // We own, nullz. TODO: store explicit len?
uint32_t next; // Internal chaining. uint32_t next; // Internal chaining.
} upb_strtable_header;
typedef struct {
upb_strtable_header hdr;
uint32_t val; // Val is at least 32 bits.
} upb_strtable_entry; } upb_strtable_entry;
typedef struct { typedef struct {
@ -81,7 +83,7 @@ typedef struct {
// when looked up! // when looked up!
void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size); void upb_inttable_init(upb_inttable *table, uint32_t size, uint16_t value_size);
void upb_inttable_free(upb_inttable *table); void upb_inttable_free(upb_inttable *table);
void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t entry_size); // TODO: update void upb_strtable_init(upb_strtable *table, uint32_t size, uint16_t value_size);
void upb_strtable_free(upb_strtable *table); void upb_strtable_free(upb_strtable *table);
// Number of values in the hash table. // Number of values in the hash table.
@ -97,11 +99,13 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) {
// not already exist in the hash table. The data will be copied from val into // not already exist in the hash table. The data will be copied from val into
// the hashtable (the amount of data copied comes from value_size when the // the hashtable (the amount of data copied comes from value_size when the
// table was constructed). Therefore the data at val may be freed once the // table was constructed). Therefore the data at val may be freed once the
// call returns. For string tables, the table takes a ref on str. // call returns. For string tables, the table takes ownership of the string.
// //
// WARNING: the lowest bit of val is reserved and will be overwritten! // WARNING: the lowest bit of val is reserved and will be overwritten!
void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val); void upb_inttable_insert(upb_inttable *t, uint32_t key, const void *val);
void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update // TODO: may want to allow for more complex keys with custom hash/comparison
// functions.
void upb_strtable_insert(upb_strtable *t, const char *key, const void *val);
void upb_inttable_compact(upb_inttable *t); void upb_inttable_compact(upb_inttable *t);
INLINE void upb_strtable_clear(upb_strtable *t) { INLINE void upb_strtable_clear(upb_strtable *t) {
// TODO: improve. // TODO: improve.
@ -110,14 +114,14 @@ INLINE void upb_strtable_clear(upb_strtable *t) {
upb_strtable_init(t, 8, entry_size); upb_strtable_init(t, 8, entry_size);
} }
INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) { INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, uint32_t k) {
uint32_t bucket = k & t->t.mask; // Identity hash for ints. uint32_t bucket = k & t->t.mask; // Identity hash for ints.
assert(bucket != UPB_END_OF_CHAIN); assert(bucket != UPB_END_OF_CHAIN);
return bucket; return bucket;
} }
// Returns true if this key belongs in the array part of the table. // Returns true if this key belongs in the array part of the table.
INLINE bool _upb_inttable_isarrkey(upb_inttable *t, upb_inttable_key_t k) { INLINE bool _upb_inttable_isarrkey(upb_inttable *t, uint32_t k) {
return (k < t->array_size); return (k < t->array_size);
} }
@ -162,21 +166,44 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size); return _upb_inttable_fastlookup(t, key, t->t.entry_size, t->t.value_size);
} }
void *upb_strtable_lookup(upb_strtable *t, upb_string *key); void *upb_strtable_lookupl(upb_strtable *t, const char *key, size_t len);
void *upb_strtable_lookup(upb_strtable *t, const char *key);
/* upb_strtable_iter **********************************************************/
// Strtable iteration. Order is undefined. Insertions invalidate iterators.
// upb_strtable_iter i;
// for(upb_strtable_begin(&i, t); !upb_strtable_done(&i); upb_strtable_next(&i)) {
// const char *key = upb_strtable_iter_key(&i);
// const myval *val = upb_strtable_iter_value(&i);
// // ...
// }
typedef struct {
upb_strtable *t;
upb_strtable_entry *e;
} upb_strtable_iter;
void upb_strtable_begin(upb_strtable_iter *i, upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
INLINE bool upb_strtable_done(upb_strtable_iter *i) { return i->e == NULL; }
INLINE const char *upb_strtable_iter_key(upb_strtable_iter *i) {
return i->e->hdr.key;
}
INLINE const void *upb_strtable_iter_value(upb_strtable_iter *i) {
return &i->e->val;
}
// Provides iteration over the table. The order in which the entries are /* upb_inttable_iter **********************************************************/
// returned is undefined. Insertions invalidate iterators.
void *upb_strtable_begin(upb_strtable *t);
void *upb_strtable_next(upb_strtable *t, upb_strtable_entry *cur);
// Inttable iteration (should update strtable iteration to use this scheme too). // Inttable iteration. Order is undefined. Insertions invalidate iterators.
// The order is undefined.
// for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i); // for(upb_inttable_iter i = upb_inttable_begin(t); !upb_inttable_done(i);
// i = upb_inttable_next(t, i)) { // i = upb_inttable_next(t, i)) {
// // ... // // ...
// } // }
typedef struct { typedef struct {
upb_inttable_key_t key; uint32_t key;
upb_inttable_value *value; upb_inttable_value *value;
bool array_part; bool array_part;
} upb_inttable_iter; } upb_inttable_iter;
@ -184,7 +211,7 @@ typedef struct {
upb_inttable_iter upb_inttable_begin(upb_inttable *t); upb_inttable_iter upb_inttable_begin(upb_inttable *t);
upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter); upb_inttable_iter upb_inttable_next(upb_inttable *t, upb_inttable_iter iter);
INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; } INLINE bool upb_inttable_done(upb_inttable_iter iter) { return iter.value == NULL; }
INLINE upb_inttable_key_t upb_inttable_iter_key(upb_inttable_iter iter) { INLINE uint32_t upb_inttable_iter_key(upb_inttable_iter iter) {
return iter.key; return iter.key;
} }
INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) { INLINE void *upb_inttable_iter_value(upb_inttable_iter iter) {

@ -21,12 +21,15 @@ struct _upb_textprinter {
#define CHECK(x) if ((x) < 0) goto err; #define CHECK(x) if ((x) < 0) goto err;
static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str, static int upb_textprinter_putescaped(upb_textprinter *p, upb_strref *strref,
bool preserve_utf8) { bool preserve_utf8) {
// Based on CEscapeInternal() from Google's protobuf release. // Based on CEscapeInternal() from Google's protobuf release.
// TODO; we could read directly fraom a bytesrc's buffer instead.
// TODO; we could write directly into a bytesink's buffer instead. // TODO; we could write directly into a bytesink's buffer instead.
char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
const char *src = upb_string_getrobuf(str), *end = src + upb_string_len(str); char buf[strref->len], *src = buf;
char *end = src + strref->len;
upb_strref_read(strref, src);
// I think hex is prettier and more useful, but proto2 uses octal; should // I think hex is prettier and more useful, but proto2 uses octal; should
// investigate whether it can parse hex also. // investigate whether it can parse hex also.
@ -35,8 +38,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
for (; src < end; src++) { for (; src < end; src++) {
if (dstend - dst < 4) { if (dstend - dst < 4) {
upb_string str = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf); CHECK(upb_bytesink_write(p->bytesink, dstbuf, dst - dstbuf, &p->status));
CHECK(upb_bytesink_putstr(p->bytesink, &str, &p->status));
dst = dstbuf; dst = dstbuf;
} }
@ -64,8 +66,7 @@ static int upb_textprinter_putescaped(upb_textprinter *p, upb_string *str,
last_hex_escape = is_hex_escape; last_hex_escape = is_hex_escape;
} }
// Flush remaining data. // Flush remaining data.
upb_string outstr = UPB_STACK_STRING_LEN(dstbuf, dst - dstbuf); CHECK(upb_bytesink_write(p->bytesink, dst, dst - dstbuf, &p->status));
CHECK(upb_bytesink_putstr(p->bytesink, &outstr, &p->status));
return 0; return 0;
err: err:
return -1; return -1;
@ -74,7 +75,7 @@ err:
static int upb_textprinter_indent(upb_textprinter *p) { static int upb_textprinter_indent(upb_textprinter *p) {
if(!p->single_line) if(!p->single_line)
for(int i = 0; i < p->indent_depth; i++) for(int i = 0; i < p->indent_depth; i++)
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
return 0; return 0;
err: err:
return -1; return -1;
@ -82,9 +83,9 @@ err:
static int upb_textprinter_endfield(upb_textprinter *p) { static int upb_textprinter_endfield(upb_textprinter *p) {
if(p->single_line) { if(p->single_line) {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" "), &p->status)); CHECK(upb_bytesink_writestr(p->bytesink, " ", &p->status));
} else { } else {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status)); CHECK(upb_bytesink_writestr(p->bytesink, "\n", &p->status));
} }
return 0; return 0;
err: err:
@ -96,7 +97,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
upb_textprinter *p = _p; upb_textprinter *p = _p;
upb_fielddef *f = upb_value_getfielddef(fval); upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p); upb_textprinter_indent(p);
CHECK(upb_bytesink_printf(p->bytesink, &p->status, UPB_STRFMT ": ", UPB_STRARG(f->name))); CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%s: ", f->name));
#define CASE(fmtstr, member) \ #define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break; CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) { switch(f->type) {
@ -118,12 +119,11 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
CASE("%" PRIu32, uint32); CASE("%" PRIu32, uint32);
case UPB_TYPE(ENUM): { case UPB_TYPE(ENUM): {
upb_enumdef *enum_def = upb_downcast_enumdef(f->def); upb_enumdef *enum_def = upb_downcast_enumdef(f->def);
upb_string *enum_label = const char *label = upb_enumdef_iton(enum_def, upb_value_getint32(val));
upb_enumdef_iton(enum_def, upb_value_getint32(val)); if (label) {
if (enum_label) {
// We found a corresponding string for this enum. Otherwise we fall // We found a corresponding string for this enum. Otherwise we fall
// through to the int32 code path. // through to the int32 code path.
CHECK(upb_bytesink_putstr(p->bytesink, enum_label, &p->status)); CHECK(upb_bytesink_writestr(p->bytesink, label, &p->status));
break; break;
} }
} }
@ -134,12 +134,13 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
case UPB_TYPE(BOOL): case UPB_TYPE(BOOL):
CASE("%hhu", bool); CASE("%hhu", bool);
case UPB_TYPE(STRING): case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): case UPB_TYPE(BYTES): {
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
CHECK(upb_textprinter_putescaped(p, upb_value_getstr(val), CHECK(upb_textprinter_putescaped(p, upb_value_getstrref(val),
f->type == UPB_TYPE(STRING))); f->type == UPB_TYPE(STRING)));
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_writestr(p->bytesink, "\"", &p->status));
break; break;
}
} }
upb_textprinter_endfield(p); upb_textprinter_endfield(p);
return UPB_CONTINUE; return UPB_CONTINUE;
@ -151,11 +152,10 @@ static upb_sflow_t upb_textprinter_startsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p; upb_textprinter *p = _p;
upb_fielddef *f = upb_value_getfielddef(fval); upb_fielddef *f = upb_value_getfielddef(fval);
upb_textprinter_indent(p); upb_textprinter_indent(p);
bool ret = upb_bytesink_printf(p->bytesink, &p->status, bool ret = upb_bytesink_printf(p->bytesink, &p->status, "%s {", f->name);
UPB_STRFMT " {", UPB_STRARG(f->name));
if (!ret) return UPB_SBREAK; if (!ret) return UPB_SBREAK;
if (!p->single_line) if (!p->single_line)
upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); upb_bytesink_writestr(p->bytesink, "\n", &p->status);
p->indent_depth++; p->indent_depth++;
return UPB_CONTINUE_WITH(_p); return UPB_CONTINUE_WITH(_p);
} }
@ -165,7 +165,7 @@ static upb_flow_t upb_textprinter_endsubmsg(void *_p, upb_value fval) {
upb_textprinter *p = _p; upb_textprinter *p = _p;
p->indent_depth--; p->indent_depth--;
upb_textprinter_indent(p); upb_textprinter_indent(p);
upb_bytesink_putstr(p->bytesink, UPB_STRLIT("}"), &p->status); upb_bytesink_writestr(p->bytesink, "}", &p->status);
upb_textprinter_endfield(p); upb_textprinter_endfield(p);
return UPB_CONTINUE; return UPB_CONTINUE;
} }

@ -83,16 +83,13 @@ upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// Template for a function that checks the first two bytes with branching // Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function. // and dispatches 2-10 bytes with a separate function.
#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ #define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \
INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *p) { \ INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \
uint64_t b = 0; \ uint8_t *p = (uint8_t*)_p; \
upb_decoderet r = {p, 0}; \ if ((*p & 0x80) == 0) { upb_decoderet r = {_p + 1, *p & 0x7f}; return r; } \
memcpy(&b, r.p, 2); \ upb_decoderet r = {_p + 2, (*p & 0x7f) | ((*(p + 1) & 0x7f) << 7)}; \
if ((b & 0x80) == 0) { r.val = (b & 0x7f); r.p = p + 1; return r; } \ if ((*(p + 1) & 0x80) == 0) return r; \
r.val = (b & 0x7f) | ((b & 0x7f00) >> 1); \ return decode_max8_function(r); \
r.p = p + 2; \
if ((b & 0x8000) == 0) return r; \
return decode_max8_function(r); \
} }
UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright); UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright);

@ -1,4 +1,5 @@
#include <stdlib.h>
#include "upb_decoder.h" #include "upb_decoder.h"
#include "upb_textprinter.h" #include "upb_textprinter.h"
#include "upb_stdio.h" #include "upb_stdio.h"
@ -11,20 +12,21 @@ int main(int argc, char *argv[]) {
} }
upb_symtab *symtab = upb_symtab_new(); upb_symtab *symtab = upb_symtab_new();
upb_string *desc = upb_strreadfile(argv[1]); size_t desc_len;
const char *desc = upb_readfile(argv[1], &desc_len);
if (!desc) { if (!desc) {
fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]); fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
return 1; return 1;
} }
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(symtab, desc, &status); upb_read_descriptor(symtab, desc, desc_len, &status);
if (!upb_ok(&status)) { if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing descriptor: "); fprintf(stderr, "Error parsing descriptor: ");
upb_printerr(&status); upb_printerr(&status);
return 1; return 1;
} }
upb_string_unref(desc); free((void*)desc);
upb_string *name = upb_strdupc(argv[2]); upb_string *name = upb_strdupc(argv[2]);
upb_def *md = upb_symtab_lookup(symtab, name); upb_def *md = upb_symtab_lookup(symtab, name);
@ -40,19 +42,20 @@ int main(int argc, char *argv[]) {
return 1; return 1;
} }
upb_stdio *in = upb_stdio_new(); upb_stdio in, out;
upb_stdio_reset(in, stdin); upb_stdio_init(&in);
upb_stdio *out = upb_stdio_new(); upb_stdio_init(&out);
upb_stdio_reset(out, stdout); upb_stdio_reset(&in, stdin);
upb_stdio_reset(&out, stdout);
upb_handlers *handlers = upb_handlers_new(); upb_handlers *handlers = upb_handlers_new();
upb_textprinter *p = upb_textprinter_new(); upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(out), false); upb_textprinter_reset(p, upb_stdio_bytesink(&out), false);
upb_textprinter_reghandlers(handlers, m); upb_textprinter_reghandlers(handlers, m);
upb_decoder d; upb_decoder d;
upb_decoder_initforhandlers(&d, handlers); upb_decoder_initforhandlers(&d, handlers);
upb_decoder_reset(&d, upb_stdio_bytesrc(in), p); upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UINT64_MAX, p);
upb_clearerr(&status); upb_clearerr(&status);
upb_decoder_decode(&d, &status); upb_decoder_decode(&d, &status);
@ -63,8 +66,8 @@ int main(int argc, char *argv[]) {
} }
upb_status_uninit(&status); upb_status_uninit(&status);
upb_stdio_free(in); upb_stdio_uninit(&in);
upb_stdio_free(out); upb_stdio_uninit(&out);
upb_decoder_uninit(&d); upb_decoder_uninit(&d);
upb_textprinter_free(p); upb_textprinter_free(p);
upb_def_unref(UPB_UPCAST(m)); upb_def_unref(UPB_UPCAST(m));

@ -1,126 +0,0 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_string.h"
char static_str[] = "Static string.";
upb_string static_upbstr = UPB_STATIC_STRING(static_str);
static void test_static() {
// Static string is initialized appropriately.
assert(upb_streql(&static_upbstr, UPB_STRLIT("Static string.")));
// Taking a ref on a static string returns the same string, and repeated
// refs don't get the string in a confused state.
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
assert(upb_string_getref(&static_upbstr) == &static_upbstr);
// Unreffing a static string does nothing (is not harmful).
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
upb_string_unref(&static_upbstr);
// Recycling a static string returns a new string (that can be modified).
upb_string *str = &static_upbstr;
upb_string_recycle(&str);
assert(str != &static_upbstr);
upb_string_unref(str);
}
static void test_dynamic() {
upb_string *str = upb_string_new();
assert(str != NULL);
upb_string_unref(str);
// Can also create a string by recycle(NULL).
str = NULL;
upb_string_recycle(&str);
assert(str != NULL);
// Take a ref and recycle; should create a new string and release a ref
// on the old one.
upb_string *strcp = upb_string_getref(str);
assert(strcp == str);
assert(upb_atomic_read(&str->refcount) == 2);
upb_string_recycle(&str);
assert(strcp != str);
assert(upb_atomic_read(&str->refcount) == 1);
assert(upb_atomic_read(&strcp->refcount) == 1);
upb_string_unref(strcp);
upb_strcpyc(str, static_str);
assert(upb_string_len(str) == (sizeof(static_str) - 1));
const char *robuf = upb_string_getrobuf(str);
assert(robuf != NULL);
assert(upb_streqlc(str, static_str));
upb_string_endread(str);
upb_string *str2 = str;
upb_string_recycle(&str2);
// No other referents, so should return the same string.
assert(str2 == str);
// Write a shorter string, the same memory should be reused.
upb_strcpyc(str, "XX");
const char *robuf2 = upb_string_getrobuf(str);
assert(robuf2 == robuf);
assert(upb_streqlc(str, "XX"));
assert(upb_streql(str, UPB_STRLIT("XX")));
// Make string alias part of another string.
str2 = upb_strdupc("WXYZ");
upb_string_recycle(&str);
upb_string_substr(str, str2, 1, 2);
assert(upb_string_len(str) == 2);
assert(upb_string_len(str2) == 4);
// The two string should be aliasing the same data.
const char *robuf3 = upb_string_getrobuf(str);
const char *robuf4 = upb_string_getrobuf(str2);
assert(robuf3 == robuf4 + 1);
// The aliased string should have an extra ref.
assert(upb_atomic_read(&str2->refcount) == 2);
// Recycling str should eliminate the extra ref.
upb_string_recycle(&str);
assert(upb_atomic_read(&str2->refcount) == 1);
// Resetting str should reuse its old data.
upb_strcpyc(str, "XX");
const char *robuf5 = upb_string_getrobuf(str);
assert(robuf5 == robuf);
// Resetting str to something very long should require new data to be
// allocated.
upb_string_recycle(&str);
const char longstring[] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
upb_strcpyc(str, longstring);
const char *robuf6 = upb_string_getrobuf(str);
assert(robuf6 != robuf);
assert(upb_streqlc(str, longstring));
// Test printf.
upb_string_recycle(&str);
upb_string_printf(str, "Number: %d, String: %s", 5, "YO!");
assert(upb_streqlc(str, "Number: 5, String: YO!"));
// Test asprintf
upb_string *str3 = upb_string_asprintf("Yo %s: " UPB_STRFMT "\n",
"Josh", UPB_STRARG(str));
const char expected[] = "Yo Josh: Number: 5, String: YO!\n";
assert(upb_streqlc(str3, expected));
upb_string_unref(str);
upb_string_unref(str2);
upb_string_unref(str3);
// Unref of NULL is harmless.
upb_string_unref(NULL);
}
int main() {
test_static();
test_dynamic();
}

@ -1,7 +1,6 @@
#undef NDEBUG /* ensure tests always assert. */ #undef NDEBUG /* ensure tests always assert. */
#include "upb_table.h" #include "upb_table.h"
#include "upb_string.h"
#include "test_util.h" #include "test_util.h"
#include <assert.h> #include <assert.h>
#include <map> #include <map>
@ -23,7 +22,6 @@ typedef struct {
} inttable_entry; } inttable_entry;
typedef struct { typedef struct {
upb_strtable_entry e;
int32_t value; /* ASCII Value of first letter */ int32_t value; /* ASCII Value of first letter */
} strtable_entry; } strtable_entry;
@ -47,34 +45,29 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
all.insert(key); all.insert(key);
strtable_entry e; strtable_entry e;
e.value = key[0]; e.value = key[0];
upb_string *str = upb_strduplen(key.c_str(), key.size()); upb_strtable_insert(&table, key.c_str(), &e);
e.e.key = str;
upb_strtable_insert(&table, &e.e);
upb_string_unref(str); // The table still owns a ref.
m[key] = key[0]; m[key] = key[0];
} }
/* Test correctness. */ /* Test correctness. */
for(uint32_t i = 0; i < keys.size(); i++) { for(uint32_t i = 0; i < keys.size(); i++) {
const string& key = keys[i]; const string& key = keys[i];
upb_string *str = upb_strduplen(key.c_str(), key.size()); strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, key.c_str());
strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str); printf("Looking up %s...\n", key.c_str());
printf("Looking up " UPB_STRFMT "...\n", UPB_STRARG(str));
if(m.find(key) != m.end()) { /* Assume map implementation is correct. */ if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
assert(e); assert(e);
assert(upb_streql(e->e.key, str));
assert(e->value == key[0]); assert(e->value == key[0]);
assert(m[key] == key[0]); assert(m[key] == key[0]);
} else { } else {
assert(e == NULL); assert(e == NULL);
} }
upb_string_unref(str);
} }
strtable_entry *e; upb_strtable_iter iter;
for(e = (strtable_entry*)upb_strtable_begin(&table); e; for(upb_strtable_begin(&iter, &table); !upb_strtable_done(&iter);
e = (strtable_entry*)upb_strtable_next(&table, &e->e)) { upb_strtable_next(&iter)) {
string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key)); const char *key = upb_strtable_iter_key(&iter);
string tmp(key, strlen(key));
std::set<string>::iterator i = all.find(tmp); std::set<string>::iterator i = all.find(tmp);
assert(i != all.end()); assert(i != all.end());
all.erase(i); all.erase(i);

@ -71,18 +71,17 @@ void compare_arrays(const google::protobuf::Reflection *r,
case UPB_TYPE(STRING): case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): { case UPB_TYPE(BYTES): {
std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i); std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
upb_string *upbstr = upb_value_getstr(v); upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr)); std::string str2(upbstr->ptr, upbstr->len);
string_size += upb_string_len(upbstr); string_size += upbstr->len;
ASSERT(str == str2); ASSERT(str == str2);
break; break;
} }
case UPB_TYPE(GROUP): case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE): case UPB_TYPE(MESSAGE):
// XXX: getstr
ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL); ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i), compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
upb_value_getstr(v), upb_downcast_msgdef(upb_f->def)); upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
} }
} }
ASSERT(upb_seq_done(iter)); ASSERT(upb_seq_done(iter));
@ -129,9 +128,9 @@ void compare_values(const google::protobuf::Reflection *r,
case UPB_TYPE(STRING): case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): { case UPB_TYPE(BYTES): {
std::string str = r->GetString(proto2_msg, proto2_f); std::string str = r->GetString(proto2_msg, proto2_f);
upb_string *upbstr = upb_value_getstr(v); upb_stdarray *upbstr = (upb_stdarray*)upb_value_getptr(v);
std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr)); std::string str2(upbstr->ptr, upbstr->len);
string_size += upb_string_len(upbstr); string_size += upbstr->len;
ASSERT(str == str2); ASSERT(str == str2);
break; break;
} }
@ -139,7 +138,7 @@ void compare_values(const google::protobuf::Reflection *r,
case UPB_TYPE(MESSAGE): case UPB_TYPE(MESSAGE):
// XXX: getstr // XXX: getstr
compare(r->GetMessage(proto2_msg, proto2_f), compare(r->GetMessage(proto2_msg, proto2_f),
upb_value_getstr(v), upb_downcast_msgdef(upb_f->def)); upb_value_getptr(v), upb_downcast_msgdef(upb_f->def));
} }
} }
@ -159,9 +158,7 @@ void compare(const google::protobuf::Message& proto2_msg,
ASSERT(upb_f); ASSERT(upb_f);
ASSERT(proto2_f); ASSERT(proto2_f);
ASSERT(upb_f->number == proto2_f->number()); ASSERT(upb_f->number == proto2_f->number());
ASSERT(std::string(upb_string_getrobuf(upb_f->name), ASSERT(std::string(upb_f->name) == proto2_f->name());
upb_string_len(upb_f->name)) ==
proto2_f->name());
ASSERT(upb_f->type == proto2_f->type()); ASSERT(upb_f->type == proto2_f->type());
ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated()); ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated());
@ -183,22 +180,22 @@ void compare(const google::protobuf::Message& proto2_msg,
void parse_and_compare(MESSAGE_CIDENT *proto2_msg, void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
void *upb_msg, upb_msgdef *upb_md, void *upb_msg, upb_msgdef *upb_md,
upb_string *str) const char *str, size_t len)
{ {
// Parse to both proto2 and upb. // Parse to both proto2 and upb.
ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str))); ASSERT(proto2_msg->ParseFromArray(str, len));
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_msg_clear(upb_msg, upb_md); upb_msg_clear(upb_msg, upb_md);
upb_strtomsg(str, upb_msg, upb_md, &status); upb_strtomsg(str, len, upb_msg, upb_md, &status);
if (!upb_ok(&status)) { if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing test protobuf: "); fprintf(stderr, "Error parsing test protobuf: ");
upb_printerr(&status); upb_status_print(&status, stderr);
exit(1); exit(1);
} }
string_size = 0; string_size = 0;
compare(*proto2_msg, upb_msg, upb_md); compare(*proto2_msg, upb_msg, upb_md);
printf("Total size: %d, string size: %zd (%0.2f%%)\n", upb_string_len(str), printf("Total size: %zd, string size: %zd (%0.2f%%)\n", len,
string_size, (double)string_size / upb_string_len(str) * 100); string_size, (double)string_size / len * 100);
upb_status_uninit(&status); upb_status_uninit(&status);
} }
@ -221,31 +218,30 @@ int main(int argc, char *argv[])
// Initialize upb state, parse descriptor. // Initialize upb state, parse descriptor.
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_symtab *symtab = upb_symtab_new(); upb_symtab *symtab = upb_symtab_new();
upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE); size_t fds_len;
const char *fds = upb_readfile(MESSAGE_DESCRIPTOR_FILE, &fds_len);
if(fds == NULL) { if(fds == NULL) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n"); fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
return 1; return 1;
} }
upb_read_descriptor(symtab, fds, &status); upb_read_descriptor(symtab, fds, fds_len, &status);
if(!upb_ok(&status)) { if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": "); fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": ");
upb_printerr(&status); upb_status_print(&status, stderr);
return 1; return 1;
} }
upb_string_unref(fds); free((void*)fds);
upb_string *proto_name = upb_strdupc(MESSAGE_NAME); upb_def *def = upb_symtab_lookup(symtab, MESSAGE_NAME);
upb_def *def = upb_symtab_lookup(symtab, proto_name);
upb_msgdef *msgdef; upb_msgdef *msgdef;
if(!def || !(msgdef = upb_dyncast_msgdef(def))) { if(!def || !(msgdef = upb_dyncast_msgdef(def))) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n", fprintf(stderr, "Error finding symbol '%s'.\n", MESSAGE_NAME);
UPB_STRARG(proto_name));
return 1; return 1;
} }
upb_string_unref(proto_name);
// Read the message data itself. // Read the message data itself.
upb_string *str = upb_strreadfile(MESSAGE_FILE); size_t len;
const char *str = upb_readfile(MESSAGE_FILE, &len);
if(str == NULL) { if(str == NULL) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return 1; return 1;
@ -254,13 +250,13 @@ int main(int argc, char *argv[])
// Run twice to test proper object reuse. // Run twice to test proper object reuse.
MESSAGE_CIDENT proto2_msg; MESSAGE_CIDENT proto2_msg;
void *upb_msg = upb_stdmsg_new(msgdef); void *upb_msg = upb_stdmsg_new(msgdef);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str); parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str); parse_and_compare(&proto2_msg, upb_msg, msgdef, str, len);
printf("All tests passed, %d assertions.\n", num_assertions); printf("All tests passed, %d assertions.\n", num_assertions);
upb_stdmsg_free(upb_msg, msgdef); upb_stdmsg_free(upb_msg, msgdef);
upb_def_unref(UPB_UPCAST(msgdef)); upb_def_unref(UPB_UPCAST(msgdef));
upb_string_unref(str); free((void*)str);
upb_symtab_unref(symtab); upb_symtab_unref(symtab);
upb_status_uninit(&status); upb_status_uninit(&status);
google::protobuf::ShutdownProtobufLibrary(); google::protobuf::ShutdownProtobufLibrary();

@ -11,16 +11,18 @@
static upb_symtab *load_test_proto() { static upb_symtab *load_test_proto() {
upb_symtab *s = upb_symtab_new(); upb_symtab *s = upb_symtab_new();
ASSERT(s); ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb"); size_t len;
char *descriptor = upb_readfile("tests/test.proto.pb", &len);
if(!descriptor) { if(!descriptor) {
fprintf(stderr, "Couldn't read input file tests/test.proto.pb\n"); fprintf(stderr, "Couldn't read input file tests/test.proto.pb\n");
exit(1); exit(1);
} }
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(s, descriptor, &status); upb_read_descriptor(s, descriptor, len, &status);
upb_status_print(&status, stderr);
ASSERT(upb_ok(&status)); ASSERT(upb_ok(&status));
upb_status_uninit(&status); upb_status_uninit(&status);
upb_string_unref(descriptor); free(descriptor);
return s; return s;
} }
@ -33,9 +35,7 @@ static upb_flow_t upb_test_onvalue(void *closure, upb_value fval, upb_value val)
static void test_upb_jit() { static void test_upb_jit() {
upb_symtab *s = load_test_proto(); upb_symtab *s = load_test_proto();
upb_string *symname = upb_strdupc("SimplePrimitives"); upb_def *def = upb_symtab_lookup(s, "SimplePrimitives");
upb_def *def = upb_symtab_lookup(s, symname);
upb_string_unref(symname);
ASSERT(def); ASSERT(def);
upb_handlers *h = upb_handlers_new(); upb_handlers *h = upb_handlers_new();
@ -54,9 +54,7 @@ static void test_upb_symtab() {
// Test cycle detection by making a cyclic def's main refcount go to zero // Test cycle detection by making a cyclic def's main refcount go to zero
// and then be incremented to one again. // and then be incremented to one again.
upb_string *symname = upb_strdupc("A"); upb_def *def = upb_symtab_lookup(s, "A");
upb_def *def = upb_symtab_lookup(s, symname);
upb_string_unref(symname);
ASSERT(def); ASSERT(def);
upb_symtab_unref(s); upb_symtab_unref(s);
upb_msgdef *m = upb_downcast_msgdef(def); upb_msgdef *m = upb_downcast_msgdef(def);

Loading…
Cancel
Save