Significant memory-management refactoring any Python extension.

pull/13171/head
Joshua Haberman 16 years ago
parent a223f9af30
commit 040f7e6ba2
  1. 38
      Makefile
  2. 79
      descriptor/descriptor.h
  3. 53
      descriptor/descriptor_const.h
  4. 16
      lang_ext/python/cext.c
  5. 48
      lang_ext/python/cext.h
  6. 164
      lang_ext/python/definition.c
  7. 20
      lang_ext/python/definition.h
  8. 919
      lang_ext/python/pb.c
  9. 18
      lang_ext/python/setup.py
  10. 93
      src/upb.h
  11. 89
      src/upb_array.h
  12. 16
      src/upb_context.c
  13. 9
      src/upb_enum.h
  14. 1
      src/upb_inlinedefs.c
  15. 208
      src/upb_mm.c
  16. 168
      src/upb_mm.h
  17. 213
      src/upb_msg.c
  18. 152
      src/upb_msg.h
  19. 2
      src/upb_parse.c
  20. 10
      src/upb_parse.h
  21. 23
      src/upb_string.c
  22. 59
      src/upb_string.h
  23. 119
      src/upb_struct.h
  24. 54
      src/upb_text.c
  25. 124
      tools/upbc.c

@ -1,4 +1,11 @@
#
# This Makefile builds the upb library as well as associated tests, tools, and
# language extensions.
#
# It does not use autoconf/automake/libtool because I can't stomach all the
# cruft. If you're not compiling for gcc, you may have to change some of the
# options.
#
# Summary of compiler flags you may want to use:
#
# * -DNDEBUG: makes binary smaller and faster by removing sanity checks.
@ -25,19 +32,28 @@ CPPFLAGS=-Wall -Wextra -g $(INCLUDE) $(strip $(shell test -f perf-cppflags && ca
LDLIBS=-lpthread
LIBUPB=src/libupb.a
ALL=deps $(OBJ) $(LIBUPB) tests/test_table tests/tests tools/upbc
LIBUPB_PIC=src/libupb_pic.a
LIBUPB_SHARED=src/libupb.so
ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc
all: $(ALL)
clean:
rm -rf $(call rwildcard,,*.o) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
rm -rf descriptor/descriptor.proto.pb
cd lang_ext/python && python setup.py clean --all
# The core library (src/libupb.a)
OBJ=src/upb_parse.o src/upb_table.o src/upb_msg.o src/upb_enum.o src/upb_context.o \
src/upb_string.o src/upb_text.o src/upb_serialize.o descriptor/descriptor.o
SRC=$(call rwildcard,,*.c)
HEADERS=$(call rwildcard,,*.h)
$(LIBUPB): $(OBJ)
ar rcs $(LIBUPB) $(OBJ)
SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \
src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c
STATICOBJ=$(patsubst %.c,%.o,$(SRC))
SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC))
# building shared objects is like building static ones, except -fPIC is added.
%.lo : %.c ; $(CC) -fPIC $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
$(LIBUPB): $(STATICOBJ)
ar rcs $(LIBUPB) $(STATICOBJ)
$(LIBUPB_PIC): $(SHAREDOBJ)
ar rcs $(LIBUPB_PIC) $(SHAREDOBJ)
$(LIBUPB_SHARED): $(SHAREDOBJ)
$(CC) -shared -o $(LIBUPB_SHARED) $(SHAREDOBJ)
# Regenerating the auto-generated files in descriptor/.
descriptor/descriptor.proto.pb: descriptor/descriptor.proto
@ -47,6 +63,10 @@ descriptor/descriptor.proto.pb: descriptor/descriptor.proto
descriptorgen: descriptor/descriptor.proto.pb tools/upbc
./tools/upbc -i upb_file_descriptor_set -o descriptor/descriptor descriptor/descriptor.proto.pb
# Language extensions.
python: $(LIBUPB_PIC)
cd lang_ext/python && python setup.py build
# Tests
test: tests/tests
./tests/tests
@ -136,5 +156,5 @@ benchmarks/b.parsetostruct_googlemessage2.proto2_compiled: \
benchmarks/google_messages.pb.cc -lprotobuf -lpthread
-include deps
deps: $(SRC) $(HEADERS) gen-deps.sh Makefile
deps: gen-deps.sh Makefile $(call rwildcard,,*.c) $(call rwildcard,,*.h)
@./gen-deps.sh $(SRC)

@ -3,9 +3,7 @@
#ifndef DESCRIPTOR_DESCRIPTOR_H
#define DESCRIPTOR_DESCRIPTOR_H
#include <upb_string.h>
#include <upb_array.h>
#include <upb_struct.h>
#ifdef __cplusplus
extern "C" {
@ -14,45 +12,6 @@ extern "C" {
struct google_protobuf_FileDescriptorSet;
extern struct google_protobuf_FileDescriptorSet *upb_file_descriptor_set;
/* Enums. */
typedef enum google_protobuf_FieldOptions_CType {
GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1,
GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2
} google_protobuf_FieldOptions_CType;
typedef enum google_protobuf_FieldDescriptorProto_Type {
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
} google_protobuf_FieldDescriptorProto_Type;
typedef enum google_protobuf_FieldDescriptorProto_Label {
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
} google_protobuf_FieldDescriptorProto_Label;
typedef enum google_protobuf_FileOptions_OptimizeMode {
GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2
} google_protobuf_FileOptions_OptimizeMode;
/* Forward declarations of all message types.
* So they can refer to each other in possibly-recursive ways. */
@ -131,8 +90,8 @@ typedef struct google_protobuf_MethodOptions
/* The message definitions themselves. */
struct google_protobuf_UninterpretedOption_NamePart {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -146,8 +105,8 @@ struct google_protobuf_UninterpretedOption_NamePart {
UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption_NamePart)
struct google_protobuf_DescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -171,8 +130,8 @@ struct google_protobuf_DescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto)
struct google_protobuf_EnumDescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -188,8 +147,8 @@ struct google_protobuf_EnumDescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumDescriptorProto)
struct google_protobuf_UninterpretedOption {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -211,8 +170,8 @@ struct google_protobuf_UninterpretedOption {
UPB_DEFINE_MSG_ARRAY(google_protobuf_UninterpretedOption)
struct google_protobuf_FileDescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -238,8 +197,8 @@ struct google_protobuf_FileDescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorProto)
struct google_protobuf_MethodDescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -257,8 +216,8 @@ struct google_protobuf_MethodDescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_MethodDescriptorProto)
struct google_protobuf_EnumValueOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -270,8 +229,8 @@ struct google_protobuf_EnumValueOptions {
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueOptions)
struct google_protobuf_EnumValueDescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -287,8 +246,8 @@ struct google_protobuf_EnumValueDescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumValueDescriptorProto)
struct google_protobuf_ServiceDescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -304,8 +263,8 @@ struct google_protobuf_ServiceDescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceDescriptorProto)
struct google_protobuf_FileDescriptorSet {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -317,8 +276,8 @@ struct google_protobuf_FileDescriptorSet {
UPB_DEFINE_MSG_ARRAY(google_protobuf_FileDescriptorSet)
struct google_protobuf_DescriptorProto_ExtensionRange {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -332,8 +291,8 @@ struct google_protobuf_DescriptorProto_ExtensionRange {
UPB_DEFINE_MSG_ARRAY(google_protobuf_DescriptorProto_ExtensionRange)
struct google_protobuf_FieldOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -353,8 +312,8 @@ struct google_protobuf_FieldOptions {
UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldOptions)
struct google_protobuf_FileOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -374,8 +333,8 @@ struct google_protobuf_FileOptions {
UPB_DEFINE_MSG_ARRAY(google_protobuf_FileOptions)
struct google_protobuf_MessageOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -389,8 +348,8 @@ struct google_protobuf_MessageOptions {
UPB_DEFINE_MSG_ARRAY(google_protobuf_MessageOptions)
struct google_protobuf_EnumOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -402,8 +361,8 @@ struct google_protobuf_EnumOptions {
UPB_DEFINE_MSG_ARRAY(google_protobuf_EnumOptions)
struct google_protobuf_FieldDescriptorProto {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -429,8 +388,8 @@ struct google_protobuf_FieldDescriptorProto {
UPB_DEFINE_MSG_ARRAY(google_protobuf_FieldDescriptorProto)
struct google_protobuf_ServiceOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {
@ -442,8 +401,8 @@ struct google_protobuf_ServiceOptions {
UPB_DEFINE_MSG_ARRAY(google_protobuf_ServiceOptions)
struct google_protobuf_MethodOptions {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
void *gptr;
union {
uint8_t bytes[1];
struct {

@ -0,0 +1,53 @@
/* This file was generated by upbc (the upb compiler). Do not edit. */
#ifndef DESCRIPTOR_DESCRIPTOR_C
#define DESCRIPTOR_DESCRIPTOR_C
#ifdef __cplusplus
extern "C" {
#endif
/* Enums. */
typedef enum google_protobuf_FieldOptions_CType {
GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1,
GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2
} google_protobuf_FieldOptions_CType;
typedef enum google_protobuf_FieldDescriptorProto_Type {
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
} google_protobuf_FieldDescriptorProto_Type;
typedef enum google_protobuf_FieldDescriptorProto_Label {
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
} google_protobuf_FieldDescriptorProto_Label;
typedef enum google_protobuf_FileOptions_OptimizeMode {
GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2
} google_protobuf_FileOptions_OptimizeMode;
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* DESCRIPTOR_DESCRIPTOR_C */

@ -0,0 +1,16 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
*/
#include "cext.h"
PyMODINIT_FUNC
initcext(void)
{
PyObject *mod = Py_InitModule("upb.cext", NULL);
initdefinition();
initpb();
}

@ -0,0 +1,48 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
*/
#ifndef UPB_PYTHON_CEXT_H_
#define UPB_PYTHON_CEXT_H_
#include <Python.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
PyObject_HEAD
struct upb_context *context;
PyObject *created_defs;
} PyUpb_Context;
typedef struct {
PyObject_HEAD
struct upb_msgdef *def;
PyUpb_Context *context;
} PyUpb_MsgDef;
extern PyTypeObject PyUpb_MsgDefType;
/* What format string should be passed to PyArg_ParseTuple to get just a raw
* string of bytes and a length. */
#if PY_MAJOR_VERSION >= 3
#define BYTES_FORMAT "y#"
#else
#define BYTES_FORMAT "s#"
#endif
#define RETURN_BOOL(val) if(val) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; }
extern PyMODINIT_FUNC initdefinition(void);
extern PyMODINIT_FUNC initpb(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -24,21 +24,29 @@
#include "upb_context.h"
#include "upb_msg.h"
#if PY_MAJOR_VERSION > 3
const char *bytes_format = "y#";
#else
const char *bytes_format = "s#";
#endif
static PyTypeObject PyUpb_ContextType;
static struct upb_strtable msgdefs;
static struct upb_strtable contexts;
struct msgtab_entry {
struct upb_strtable_entry e;
PyUpb_MsgDef *msgdef;
};
/* upb.def.MessageDefinition **************************************************/
struct contexttab_entry {
struct upb_strtable_entry e;
PyUpb_Context *context;
};
typedef struct {
PyObject_HEAD
struct upb_msgdef *def;
} PyUpb_MsgDef;
#define CheckContext(obj) \
(void*)obj; do { \
if(!PyObject_TypeCheck(obj, &PyUpb_ContextType)) { \
PyErr_SetString(PyExc_TypeError, "Must be a upb.Context"); \
return NULL; \
} \
} while(0)
PyTypeObject PyUpb_MsgDefType; /* forward decl. */
/* upb.def.MessageDefinition **************************************************/
/* Not implemented yet, but these methods will expose information about the
* message definition (the upb_msgdef). */
@ -46,18 +54,10 @@ static PyMethodDef msgdef_methods[] = {
{NULL, NULL}
};
static PyObject *msgdef_new(struct upb_msgdef *m)
{
PyUpb_MsgDef *md_obj = (void*)PyType_GenericAlloc(&PyUpb_MsgDefType, 0);
md_obj->def = m;
upb_msgdef_ref(md_obj->def);
return (void*)md_obj;
}
static void msgdef_dealloc(PyObject *obj)
{
PyUpb_MsgDef *md_obj = (void*)obj;
upb_msgdef_unref(md_obj->def);
Py_DECREF(md_obj->context);
obj->ob_type->tp_free(obj);
}
@ -106,27 +106,11 @@ PyTypeObject PyUpb_MsgDefType = {
/* upb.Context ****************************************************************/
typedef struct {
PyObject_HEAD
struct upb_context *context;
PyObject *created_defs;
} PyUpb_Context;
static PyTypeObject PyUpb_ContextType; /* forward decl. */
#define CheckContext(obj) \
(void*)obj; do { \
if(!PyObject_TypeCheck(obj, &PyUpb_ContextType)) { \
PyErr_SetString(PyExc_TypeError, "Must be a upb.Context"); \
return NULL; \
} \
} while(0)
static PyObject *context_parsefds(PyObject *_context, PyObject *args)
{
PyUpb_Context *context = CheckContext(_context);
struct upb_string str;
if(!PyArg_ParseTuple(args, bytes_format, &str.ptr, &str.byte_len))
if(!PyArg_ParseTuple(args, BYTES_FORMAT, &str.ptr, &str.byte_len))
return NULL;
str.byte_size = 0; /* We don't own that mem. */
@ -138,35 +122,56 @@ static PyObject *context_parsefds(PyObject *_context, PyObject *args)
Py_RETURN_NONE;
}
static PyObject *get_or_create_def(PyUpb_Context *context,
struct upb_symtab_entry *e)
static PyObject *get_or_create_def(struct upb_symtab_entry *e)
{
/* Check out internal dictionary of Python classes we have already created
* (keyed by the address of the obj we are referencing). */
#if PY_MAJOR_VERSION > 3
PyObject *str = PyBytes_FromStringAndSize((char*)&e->ref, sizeof(void*));
#else
PyObject *str = PyString_FromStringAndSize((char*)&e->ref, sizeof(void*));
#endif
/* Would use PyDict_GetItemStringAndSize() if it existed, but only
* PyDict_GetItemString() exists, and pointers could have NULL bytes. */
PyObject *def = PyDict_GetItem(context->created_defs, str);
if(!def) {
switch(e->type) {
case UPB_SYM_MESSAGE:
def = msgdef_new(e->ref.msg);
break;
case UPB_SYM_ENUM:
case UPB_SYM_SERVICE:
case UPB_SYM_EXTENSION:
default:
def = NULL;
break;
}
if(def) PyDict_SetItem(context->created_defs, str, def);
switch(e->type) {
case UPB_SYM_MESSAGE: return (PyObject*)get_or_create_msgdef(e->ref.msg);
case UPB_SYM_ENUM:
case UPB_SYM_SERVICE:
case UPB_SYM_EXTENSION:
default: fprintf(stderr, "upb.pb, not implemented.\n"); abort(); return NULL;
}
Py_DECREF(str);
return def;
}
static PyUpb_Context *get_or_create_context(struct upb_context *context)
{
PyUpb_Context *pycontext = NULL;
struct upb_string str = {.ptr = (char*)&context, .byte_len = sizeof(void*)};
struct contexttab_entry *e = upb_strtable_lookup(&contexts, &str);
if(!e) {
pycontext = (void*)PyUpb_ContextType.tp_alloc(&PyUpb_ContextType, 0);
pycontext->context = context;
struct contexttab_entry new_e = {
.e = {.key = {.ptr = (char*)&pycontext->context, .byte_len = sizeof(void*)}},
.context = pycontext
};
upb_strtable_insert(&contexts, &new_e.e);
} else {
pycontext = e->context;
Py_INCREF(pycontext);
}
return pycontext;
}
PyUpb_MsgDef *get_or_create_msgdef(struct upb_msgdef *def)
{
PyUpb_MsgDef *pydef = NULL;
struct upb_string str = {.ptr = (char*)&def, .byte_len = sizeof(void*)};
struct msgtab_entry *e = upb_strtable_lookup(&msgdefs, &str);
if(!e) {
pydef = (void*)PyUpb_MsgDefType.tp_alloc(&PyUpb_MsgDefType, 0);
pydef->def = def;
pydef->context = get_or_create_context(def->context);
struct msgtab_entry new_e = {
.e = {.key = {.ptr = (char*)&pydef->def, .byte_len = sizeof(void*)}},
.msgdef = pydef
};
upb_strtable_insert(&msgdefs, &new_e.e);
} else {
pydef = e->msgdef;
Py_INCREF(pydef);
}
return pydef;
}
static PyObject *context_lookup(PyObject *self, PyObject *args)
@ -179,7 +184,7 @@ static PyObject *context_lookup(PyObject *self, PyObject *args)
struct upb_symtab_entry e;
if(upb_context_lookup(context->context, &str, &e)) {
return get_or_create_def(context, &e);
return get_or_create_def(&e);
} else {
Py_RETURN_NONE;
}
@ -197,12 +202,13 @@ static PyObject *context_resolve(PyObject *self, PyObject *args)
struct upb_symtab_entry e;
if(upb_context_resolve(context->context, &base, &str, &e)) {
return get_or_create_def(context, &e);
return get_or_create_def(&e);
} else {
Py_RETURN_NONE;
}
}
/* Callback for upb_context_enumerate below. */
static void add_string(void *udata, struct upb_symtab_entry *entry)
{
PyObject *list = udata;
@ -244,7 +250,11 @@ static PyObject *context_new(PyTypeObject *subtype,
{
PyUpb_Context *obj = (void*)subtype->tp_alloc(subtype, 0);
obj->context = upb_context_new();
obj->created_defs = PyDict_New();
struct contexttab_entry e = {
.e = {.key = {.ptr = (char*)&obj->context, .byte_len = sizeof(void*)}},
.context = obj
};
upb_strtable_insert(&contexts, &e.e);
return (void*)obj;
}
@ -252,7 +262,9 @@ static void context_dealloc(PyObject *obj)
{
PyUpb_Context *c = (void*)obj;
upb_context_unref(c->context);
Py_DECREF(c->created_defs);
/* TODO: once strtable supports delete. */
//struct upb_string ptrstr = {.ptr = (char*)&c->context, .byte_len = sizeof(void*)};
//upb_strtable_delete(&contexts, &ptrstr);
obj->ob_type->tp_free(obj);
}
@ -299,17 +311,25 @@ static PyTypeObject PyUpb_ContextType = {
0, /* tp_free */
};
PyMethodDef methods[] = {
static PyMethodDef methods[] = {
{NULL, NULL}
};
PyMODINIT_FUNC
initdefinition(void)
{
if(PyType_Ready(&PyUpb_ContextType) < 0) return;
Py_INCREF(&PyUpb_ContextType); /* TODO: necessary? */
if(PyType_Ready(&PyUpb_MsgDefType) < 0) return;
Py_INCREF(&PyUpb_MsgDefType); /* TODO: necessary? */
PyObject *mod = Py_InitModule("upb.definition", methods);
/* PyModule_AddObject steals a reference. These objects are statically
* allocated and must not be deleted, so we increment their refcount. */
Py_INCREF(&PyUpb_ContextType);
Py_INCREF(&PyUpb_MsgDefType);
PyObject *mod = Py_InitModule("upb.cext.definition", methods);
PyModule_AddObject(mod, "Context", (PyObject*)&PyUpb_ContextType);
PyModule_AddObject(mod, "MessageDefinition", (PyObject*)&PyUpb_MsgDefType);
upb_strtable_init(&contexts, 8, sizeof(struct contexttab_entry));
upb_strtable_init(&msgdefs, 16, sizeof(struct msgtab_entry));
}

@ -16,16 +16,30 @@
extern "C" {
#endif
typedef struct {
PyObject_HEAD
struct upb_context *context;
} PyUpb_Context;
typedef struct {
PyObject_HEAD
struct upb_msgdef *def;
} PyUpb_MessageDefinition;
PyUpb_Context *context;
} PyUpb_MsgDef;
extern PyTypeObject PyUpb_MessageDefinitionType;
extern PyTypeObject PyUpb_MsgDefType;
/* What format string should be passed to PyArg_ParseTuple to get just a raw
* string of bytes and a length. */
extern const char *bytes_format;
#if PY_MAJOR_VERSION >= 3
#define BYTES_FORMAT "y#"
#else
#define BYTES_FORMAT "s#"
#endif
PyUpb_MsgDef *get_or_create_msgdef(struct upb_msgdef *def);
#define RETURN_BOOL(val) if(val) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; }
#ifdef __cplusplus
} /* extern "C" */

@ -0,0 +1,919 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file implements an interface to Python that is compatible
* (as much as possible) with proto1 (the first implementation of
* protocol buffers, which is only released internally to Google).
*
* The key interface we must support is ProtocolMessage. Each message
* type has its own Python class that supports the ProtocolMessage
* interface (obj.Clear(), obj.IsInitialized(), etc) as well as
* message-specific accessors (obj.foo(), obj.set_foo(),
* obj.clear_foo(), etc).
*
* accessors. We represent these message types as instances as
* upb.pb.MessageType objects. In other words, these instances
* are both instances of upb.pb.MessageType *and* classes of
* type MyProtoType.
*/
#include <Python.h>
#include <stddef.h>
#include "upb_mm.h"
#include "definition.h"
/* Opcodes that describe all of the operations you can perform on a field of a
* protobuf from Python. For example, foo.has_bar() uses opcode OP_HAS. */
typedef enum {
/* For non-repeated fields. */
OP_HAS,
/* For non-repeated fields that are not submessages. */
OP_SET,
/* For non-repeated message fields. */
OP_MUTABLE,
/* For repeated fields. */
OP_SIZE, OP_LIST, OP_ADD,
/* For all types of fields. */
OP_GET, OP_CLEAR
} PyUpb_PbBoundFieldOpCode;
const char *opcode_names[] = {
"OP_HAS", "OP_SET", "OP_MUTABLE", "OP_SIZE", "OP_LIST", "OP_ADD", "OP_GET", "OP_CLEAR"
};
/* Structures for the Python objects we define. */
typedef struct {
PyObject_HEAD;
PyUpb_MsgDef *def;
} PyUpb_PbMsgCreator;
typedef struct {
PyObject_HEAD;
struct upb_mm_ref ref;
PyUpb_MsgDef *def;
} PyUpb_PbMsg;
typedef struct {
PyObject_HEAD;
PyUpb_PbMsg *msg;
struct upb_msg_fielddef *f;
PyUpb_PbBoundFieldOpCode code;
} PyUpb_PbBoundFieldOp;
static PyTypeObject PyUpb_PbMsgCreatorType;
static PyTypeObject PyUpb_PbMsgType;
static PyTypeObject PyUpb_PbBoundFieldOpType;
#define Check_MsgCreator(obj) \
(void*)obj; do { \
if(!PyObject_TypeCheck(obj, &PyUpb_PbMsgCreatorType)) { \
PyErr_SetString(PyExc_TypeError, "must be a MessageCreator"); \
return NULL; \
} \
} while(0)
#define Check_Message(obj) \
(void*)obj; do { \
if(!PyObject_TypeCheck(obj, &PyUpb_PbMsgType)) { \
PyErr_SetString(PyExc_TypeError, "must be a Message"); \
return NULL; \
} \
} while(0)
#define Check_BoundFieldOp(obj) \
(void*)obj; do { \
if(!PyObject_TypeCheck(obj, &PyUpb_PbBoundFieldOpType)) { \
PyErr_SetString(PyExc_TypeError, "must be a BoundFieldOp"); \
return NULL; \
} \
} while(0)
#define EXPECT_NO_ARGS if(!PyArg_ParseTuple(args, "")) return NULL;
#define MMREF_TO_PYOBJ(mmref) (PyObject*)((char*)(mmref)-offsetof(PyUpb_PbMsg, ref))
static struct upb_mm_ref *NewPyRef(struct upb_mm_ref *fromref,
union upb_mmptr p, upb_mm_ptrtype type)
{
(void)fromref; /* Don't care. */
struct upb_mm_ref *ref = NULL;
switch(type) {
case UPB_MM_MSG_REF: {
PyUpb_PbMsg *msg = (void*)PyUpb_PbMsgType.tp_alloc(&PyUpb_PbMsgType, 0);
msg->def = get_or_create_msgdef(p.msg->def); /* gets a ref. */
ref = &msg->ref;
break;
}
case UPB_MM_STR_REF: {
}
case UPB_MM_ARR_REF: {
}
default: assert(false); abort(); break; /* Shouldn't happen. */
}
return ref;
}
struct upb_mm pymm = {NewPyRef};
/* upb.pb.BoundFieldOp ********************************************************/
static PyObject *upb_to_py(union upb_value_ptr p, upb_field_type_t type)
{
switch(type) {
default:
PyErr_SetString(PyExc_RuntimeError, "internal: unexpected type");
return NULL;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE:
return PyFloat_FromDouble(*p._double);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT:
return PyFloat_FromDouble(*p._float);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64:
return PyLong_FromLongLong(*p.int64);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64:
return PyLong_FromUnsignedLongLong(*p.uint64);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM:
#if PY_MAJOR_VERSION >= 3
return PyLong_FromLong(*p.int32);
#else
return PyInt_FromLong(*p.int32);
#endif
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32:
return PyLong_FromLong(*p.uint32);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL:
RETURN_BOOL(*p._bool);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
/* Py3k will distinguish between these two. */
return PyString_FromStringAndSize((*p.str)->ptr, (*p.str)->byte_len);
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE: {
union upb_mmptr mmptr = upb_mmptr_read(p, UPB_MM_MSG_REF);
bool created;
struct upb_mm_ref *ref = upb_mm_getref(mmptr, UPB_MM_MSG_REF, &pymm, &created);
PyObject *obj = MMREF_TO_PYOBJ(ref);
if(!created) Py_INCREF(obj);
return obj;
}
}
}
static long convert_to_long(PyObject *val, long lobound, long hibound, bool *ok)
{
PyObject *o = PyNumber_Int(val);
if(!o) {
PyErr_SetString(PyExc_OverflowError, "could not convert to long");
*ok = false;
return -1;
}
long longval = PyInt_AS_LONG(o);
if(longval > hibound || longval < lobound) {
PyErr_SetString(PyExc_OverflowError, "value outside type bounds");
*ok = false;
return -1;
}
*ok = true;
return longval;
}
static void set_upbscalarfield(union upb_value_ptr p, PyObject *val,
upb_field_type_t type)
{
switch(type) {
default:
PyErr_SetString(PyExc_RuntimeError, "internal error");
return;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE: {
PyObject *o = PyNumber_Float(val);
if(!o) {
PyErr_SetString(PyExc_ValueError, "could not convert to double");
return;
}
*p._double = PyFloat_AS_DOUBLE(o);
return;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT: {
PyObject *o = PyNumber_Float(val);
if(!o) {
PyErr_SetString(PyExc_ValueError, "could not convert to float");
return;
}
*p._float = PyFloat_AS_DOUBLE(o);
return;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64: {
#if LONG_MAX >= INT64_MAX
bool ok;
long longval = convert_to_long(val, INT64_MIN, INT64_MAX, &ok);
if(ok) *p.int32 = longval;
return;
#else
PyObject *o = PyNumber_Long(val);
if(!o) {
PyErr_SetString(PyExc_ValueError, "could not convert to int64");
return;
}
*p.int64 = PyLong_AsLongLong(o);
return;
#endif
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64: {
PyObject *o = PyNumber_Long(val);
if(!o) {
PyErr_SetString(PyExc_ValueError, "could not convert to uint64");
return;
}
*p.uint64 = PyLong_AsUnsignedLongLong(o);
return;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM: {
bool ok;
long longval = convert_to_long(val, INT32_MIN, INT32_MAX, &ok);
if(ok) *p.int32 = longval;
return;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32: {
#if LONG_MAX >= UINT32_MAX
bool ok;
long longval = convert_to_long(val, 0, UINT32_MAX, &ok);
if(ok) *p.int32 = longval;
return;
#else
PyObject *o = PyNumber_Long(val);
if(!o) {
PyErr_SetString(PyExc_ValueError, "could not convert to uint32");
return;
}
*p.uint32 = PyLong_AsUnsignedLong(o);
return;
#endif
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL:
if(!PyBool_Check(val)) {
PyErr_SetString(PyExc_ValueError, "should be true or false");
return;
}
if(val == Py_True) *p._bool = true;
else if(val == Py_False) *p._bool = false;
else PyErr_SetString(PyExc_RuntimeError, "not true or false?");
return;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: {
size_t len = PyString_GET_SIZE(val);
upb_string_resize(*p.str, len);
memcpy((*p.str)->ptr, PyString_AS_STRING(val), len);
return;
}
}
}
static bool check_py_type(PyObject *obj, upb_field_type_t type)
{
/* TODO */
return true;
}
PyObject* fieldop_call(PyObject *callable, PyObject *args, PyObject *kw)
{
PyUpb_PbBoundFieldOp *op = Check_BoundFieldOp(callable);
PyUpb_PbMsg *pymsg = op->msg;
struct upb_mm_ref *msgref = &(pymsg->ref);
struct upb_msg *msg = pymsg->ref.p.msg;
struct upb_msg_fielddef *f = op->f;
union upb_value_ptr p = upb_msg_getptr(msg, f);
switch(op->code) {
case OP_HAS:
/* obj.has_foo() */
EXPECT_NO_ARGS;
RETURN_BOOL(upb_msg_isset(msg, f));
case OP_SET: {
PyObject *val;
if(upb_isarray(f)) {
/* obj.set_repeatedfoo(i, val) */
int i;
if(!PyArg_ParseTuple(args, "iO", &i, &val)) return NULL;
if(!upb_msg_isset(msg, f) || i >= (*p.arr)->len) {
PyErr_SetString(PyExc_IndexError, "assignment to invalid index");
return NULL;
}
p = upb_array_getelementptr(*p.arr, i, f->type);
} else {
/* obj.set_foo(val) */
if(!PyArg_ParseTuple(args, "O", &val)) return NULL;
}
set_upbscalarfield(p, val, f->type);
if(PyErr_Occurred()) return NULL;
Py_RETURN_NONE;
}
case OP_MUTABLE: {
/* obj.mutable_scalarmsg() */
EXPECT_NO_ARGS;
bool created;
PyObject *obj = MMREF_TO_PYOBJ(upb_mm_getfieldref(msgref, f, &created));
if(!created) Py_INCREF(obj);
return obj;
}
/* For repeated fields. */
case OP_SIZE: {
/* obj.repeatedfoo_size() */
EXPECT_NO_ARGS;
long len =
upb_msg_isset(msg, f) ? (*upb_msg_getptr(msg, f).arr)->len : 0;
return PyInt_FromLong(len);
}
case OP_LIST:
/* obj.repeatedfoo_list() */
case OP_ADD: {
/* Parse/Verify the args. */
PyObject *val;
if(upb_issubmsg(f)) {
/* obj.add_submsgfoo() # returns the new submsg */
EXPECT_NO_ARGS;
} else {
/* obj.add_scalarfoo(val) */
if(!PyArg_ParseTuple(args, "O", &val)) return NULL;
if(!check_py_type(val, f->type)) return NULL;
}
upb_arraylen_t len = (*p.arr)->len;
union upb_value_ptr elem_p = upb_array_getelementptr(*p.arr, len, f->type);
upb_array_resize(*p.arr, len + 1);
if(upb_issubmsg(f)) {
/* string or submsg. */
bool created;
upb_mm_ptrtype type = upb_elem_ptrtype(f);
union upb_mmptr mmptr = upb_mmptr_read(elem_p, type);
struct upb_mm_ref *valref = upb_mm_getref(mmptr, type, &pymm, &created);
assert(created);
PyObject *obj = MMREF_TO_PYOBJ(valref);
return obj;
} else {
set_upbscalarfield(elem_p, val, f->type);
if(PyErr_Occurred()) return NULL;
Py_RETURN_NONE;
}
}
/* For all fields. */
case OP_GET: {
if(upb_isarray(f)) {
/* obj.repeatedfoo(i) */
int i;
if(!PyArg_ParseTuple(args, "i", &i)) return NULL;
if(!upb_msg_isset(msg, f) || i >= (*p.arr)->len) {
PyErr_SetString(PyExc_IndexError, "get from invalid index");
return NULL;
}
p = upb_array_getelementptr(*p.arr, i, f->type);
} else {
/* obj.foo() */
EXPECT_NO_ARGS;
}
return upb_to_py(p, f->type);
}
case OP_CLEAR:
/* obj.clear_foo() */
EXPECT_NO_ARGS;
upb_mm_msgclear(msgref, f);
Py_RETURN_NONE;
default:
PyErr_SetString(PyExc_RuntimeError, "invalid bound field opcode.");
return NULL;
}
}
static void fieldop_dealloc(PyObject *obj)
{
PyUpb_PbBoundFieldOp *op = (void*)obj;
Py_DECREF(op->msg);
obj->ob_type->tp_free(obj);
}
static PyObject *fieldop_repr(PyObject *obj)
{
PyUpb_PbBoundFieldOp *op = Check_BoundFieldOp(obj);
struct upb_string *name = op->msg->def->def->descriptor->name;
/* Need to get a NULL-terminated copy of name since PyString_FromFormat
* doesn't support ptr+len. */
PyObject *nameobj = PyString_FromStringAndSize(name->ptr, name->byte_len);
struct google_protobuf_FieldDescriptorProto *fd =
upb_msg_field_descriptor(op->f, op->msg->def->def);
PyObject *fieldnameobj = PyString_FromStringAndSize(fd->name->ptr, fd->name->byte_len);
PyObject *ret =
PyString_FromFormat("<upb.pb.BoundFieldOp field='%s', op=%s, msgtype='%s'>",
PyString_AS_STRING(fieldnameobj),
opcode_names[op->code], PyString_AS_STRING(nameobj));
Py_DECREF(nameobj);
Py_DECREF(fieldnameobj);
return ret;
}
static PyTypeObject PyUpb_PbBoundFieldOpType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"upb.pb.BoundFieldOp", /* tp_name */
sizeof(PyUpb_PbBoundFieldOp), /* tp_basicsize */
0, /* tp_itemsize */
fieldop_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
fieldop_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
fieldop_call, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* Can't be created from Python. */ /* tp_new */
0, /* tp_free */
};
/* upb.pb.Message *************************************************************/
#define Check_SameProtoType(obj1, obj2) \
do { \
if(self->ob_type != other->ob_type) { \
PyErr_SetString(PyExc_TypeError, "other must be of the same type"); \
return NULL; \
} \
} while(0);
static PyObject *msg_clear(PyObject *self, PyObject *args)
{
(void)args;
PyUpb_PbMsg *msg = Check_Message(self);
upb_mm_msgclear_all(&msg->ref);
Py_RETURN_NONE;
}
//static PyObject *msg_encode(PyObject *self, PyObject *args)
//{
// (void)args;
// PyUpb_PbMsg *msg = Check_Message(self);
// struct upb_msgsizes *sizes = upb_msgsizes_new();
// struct upb_msg *upb_msg = msg->ref.p.msg;
// upb_msgsizes_read(sizes, upb_msg);
//
// size_t size = upb_msgsizes_totalsize(sizes);
// PyObject *str = PyString_FromStringAndSize(NULL, size);
// if(!str) return NULL;
// char *strbuf = PyString_AS_STRING(str);
//
// bool success = upb_msg_serialize_all(upb_msg, sizes, strbuf);
// upb_msgsizes_free(sizes);
// if(success) {
// return str;
// } else {
// /* TODO: better error than TypeError. */
// PyErr_SetString(PyExc_TypeError, "Error serializing protobuf.");
// return NULL;
// }
//}
static PyObject *msg_equals(PyObject *self, PyObject *other)
{
PyUpb_PbMsg *msg1 = Check_Message(self);
PyUpb_PbMsg *msg2 = Check_Message(other);
Check_SameProtoType(msg1, msg2);
RETURN_BOOL(upb_msg_eql(msg1->ref.p.msg, msg2->ref.p.msg, true))
}
static PyObject *msg_isinitialized(PyObject *self, PyObject *args)
{
(void)args;
PyUpb_PbMsg *msg = Check_Message(self);
RETURN_BOOL(upb_msg_all_required_fields_set(msg->ref.p.msg))
}
static PyObject *msg_parsefromstring(PyObject *self, PyObject *args)
{
PyUpb_PbMsg *msg = Check_Message(self);
char *strdata;
size_t strlen;
if(!PyArg_ParseTuple(args, BYTES_FORMAT, &strdata, &strlen))
return NULL;
if(upb_msg_parsestr(msg->ref.p.msg, strdata, strlen) != UPB_STATUS_OK) {
/* TODO: better error than TypeError. */
PyErr_SetString(PyExc_TypeError, "error parsing protobuf");
return NULL;
}
Py_RETURN_NONE;
}
static PyObject *msg_mergefromstring(PyObject *self, PyObject *args)
{
PyUpb_PbMsg *msg = Check_Message(self);
char *strdata;
size_t strlen;
if(!PyArg_ParseTuple(args, BYTES_FORMAT, &strdata, &strlen))
return NULL;
if(upb_msg_parsestr(msg->ref.p.msg, strdata, strlen) != UPB_STATUS_OK) {
/* TODO: better error than TypeError. */
PyErr_SetString(PyExc_TypeError, "error parsing protobuf");
return NULL;
}
Py_RETURN_NONE;
}
/* Commented-out methods are TODO. */
static PyMethodDef msg_methods[] = {
{"Clear", msg_clear, METH_NOARGS,
"Erases all data from the ProtocolMessage, reseting fields to their defaults"
},
//{"CopyFrom", msg_copyfrom, METH_O,
// "Copies data from another ProtocolMessage."
//},
//{"Encode", msg_encode, METH_NOARGS,
// "Returns a string representing the ProtocolMessage."
//},
{"Equals", msg_equals, METH_O,
"Returns true if the given ProtocolMessage has the same type and value."
},
{"IsInitialized", msg_isinitialized, METH_NOARGS,
"Returns true iff all required fields have been set."
},
//{"Merge", msg_merge, METH_O,
// "Merges data from the given Decoder."
//},
//{"MergeFrom", msg_mergefrom, METH_O,
// "Merges data from another ProtocolMessage of the same type."
//},
{"MergeFromString", msg_mergefromstring, METH_VARARGS,
"Merges data from the given string. Raises an exception if this does not "
"result in the ProtocolMessage being initialized."
},
//{"Output", msg_output, METH_O,
// "Writes the ProtocolMessage to the given encoder."
//},
//{"OutputUnchecked", msg_output, METH_O,
// "Writes the ProtocolMessage to the given encoder, without checking "
// "initialization"
//},
//{"Parse", msg_parse, METH_O,
// "Parses data from the given Decoder."
//},
//{"ParseASCII", msg_parseascii, METH_VARARGS,
// "Parses a string generated by ToASCII. Raises a ValueError if unknown "
// "fields are encountered."
//},
//{"ParseASCIIIgnoreUnknown", msg_parseascii, METH_VARARGS,
// "Parses a string generated by ToASCII. Ignores unknown fields."
//},
{"ParseFromString", msg_parsefromstring, METH_VARARGS,
"Parses data from the given string. Raises an exception if this does not "
"result in the ProtocolMessage being initialized."
},
//{"ToASCII", msg_toascii, METH_NOARGS,
// "Returns the ProtocolMessage as a human-readable ASCII string."
//},
//{"ToCompactASCII", msg_tocompactascii, METH_NOARGS,
// "Returns the ProtocolMessage as a human-readable ASCII string that uses "
// "tag numbers instead of field names."
//},
//{"ToShortASCII", msg_toshortascii, METH_NOARGS,
// "Returns the ProtocolMessage as a human-readable ASCII string, all on one
// "line."
//},
//{"TryMerge", msg_trymerge, METH_O,
// "Merges data from the given decoder.
//}
{NULL, NULL}
};
static bool starts_with(struct upb_string *str, struct upb_string *prefix,
struct upb_string *out_str)
{
if(str->byte_len < prefix->byte_len) return false;
if(memcmp(str->ptr, prefix->ptr, prefix->byte_len) == 0) {
out_str->ptr = str->ptr + prefix->byte_len;
out_str->byte_len = str->byte_len - prefix->byte_len;
return true;
} else {
return false;
}
}
static bool ends_with(struct upb_string *str, struct upb_string *suffix,
struct upb_string *out_str)
{
if(str->byte_len < suffix->byte_len) return false;
if(memcmp(str->ptr + str->byte_len - suffix->byte_len, suffix->ptr, suffix->byte_len) == 0) {
out_str->ptr = str->ptr;
out_str->byte_len = str->byte_len - suffix->byte_len;
return true;
} else {
return false;
}
}
PyObject *PyUpb_NewPbBoundFieldOp(PyUpb_PbMsg *msgobj, struct upb_msg_fielddef *f,
PyUpb_PbBoundFieldOpCode code)
{
/* Type check that this operation on a field of this type makes sense. */
if(upb_isarray(f)) {
switch(code) {
case OP_HAS:
case OP_SET:
case OP_MUTABLE:
return NULL;
default: break;
}
} else {
if(upb_issubmsg(f)) {
switch(code) {
case OP_SET:
case OP_SIZE:
case OP_LIST:
case OP_ADD:
return NULL;
default: break;
}
} else {
switch(code) {
case OP_MUTABLE:
case OP_SIZE:
case OP_LIST:
case OP_ADD:
return NULL;
default: break;
}
}
}
PyUpb_PbBoundFieldOp *op =
(void*)PyUpb_PbBoundFieldOpType.tp_alloc(&PyUpb_PbBoundFieldOpType, 0);
op->msg = msgobj;
op->f = f;
op->code = code;
Py_INCREF(op->msg);
return (PyObject*)op;
}
PyObject* msg_getattro(PyObject *obj, PyObject *attr_name)
{
/* Each protobuf field results in a set of four methods for a scalar or five
* methods for an array. To avoid putting 4f entries in our type dict, we
* dynamically scan the method to see if it is of these forms, and if so,
* look it up in the hash table that upb already keeps.
*
* If these repeated comparisons showed up as being a hot spot in a profile,
* there are several ways this dispatch could be optimized. */
static struct upb_string set = {.ptr = "set_", .byte_len = 4};
static struct upb_string has = {.ptr = "has_", .byte_len = 4};
static struct upb_string clear = {.ptr = "clear_", .byte_len = 6};
static struct upb_string size = {.ptr = "_size", .byte_len = 5};
static struct upb_string mutable = {.ptr = "mutable_", .byte_len = 8};
static struct upb_string add = {.ptr = "add_", .byte_len = 4};
static struct upb_string list = {.ptr = "_list", .byte_len = 5};
struct upb_string str;
Py_ssize_t len;
PyString_AsStringAndSize(attr_name, &str.ptr, &len);
if(len > UINT32_MAX) {
PyErr_SetString(PyExc_TypeError,
"Wow, that's a long attribute name you've got there.");
return NULL;
}
str.byte_len = (uint32_t)len;
PyUpb_PbMsg *msgobj = Check_Message(obj);
struct upb_msgdef *def = msgobj->ref.p.msg->def;
/* This can be a field reference iff the first letter is lowercase, because
* generic methods (eg. IsInitialized()) all start with uppercase. */
if(islower(str.ptr[0])) {
PyUpb_PbBoundFieldOpCode opcode;
struct upb_string field_name;
if(starts_with(&str, &has, &field_name))
opcode = OP_HAS;
else if(starts_with(&str, &set, &field_name))
opcode = OP_SET;
else if(starts_with(&str, &mutable, &field_name))
opcode = OP_MUTABLE;
else if(ends_with(&str, &size, &field_name))
opcode = OP_SIZE;
else if(ends_with(&str, &list, &field_name))
opcode = OP_LIST;
else if(starts_with(&str, &add, &field_name))
opcode = OP_ADD;
else if(starts_with(&str, &clear, &field_name))
opcode = OP_CLEAR;
else {
/* Could be a plain field reference (eg. obj.field(i)). */
opcode = OP_GET;
field_name = str;
}
struct upb_msg_fielddef *f = upb_msg_fieldbyname(def, &field_name);
if(f) {
PyObject *op = PyUpb_NewPbBoundFieldOp(msgobj, f, opcode);
if(op) return op;
}
}
/* Fall back on regular attribute lookup. */
return PyObject_GenericGetAttr(obj, attr_name);
}
static void msg_dealloc(PyObject *obj)
{
PyUpb_PbMsg *msg = (void*)obj;
upb_mm_release(&msg->ref);
Py_DECREF(msg->def);
obj->ob_type->tp_free(obj);
}
static PyTypeObject PyUpb_PbMsgType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"upb.pb.Message", /* tp_name */
sizeof(PyUpb_PbMsg), /* tp_basicsize */
0, /* tp_itemsize */
msg_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr (TODO) */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
msg_getattro, /* tp_getattro */
0, /* Not allowed. */ /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse (TODO) */
0, /* tp_clear (TODO) */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
msg_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* Can't be created from Python. */ /* tp_new */
0, /* tp_free */
};
/* upb.pb.MessageCreator ******************************************************/
static PyObject *creator_call(PyObject *callable, PyObject *args, PyObject *kw)
{
PyUpb_PbMsgCreator *creator = Check_MsgCreator(callable);
return MMREF_TO_PYOBJ(upb_mm_newmsg_ref(creator->def->def, &pymm));
}
static PyObject *creator_repr(PyObject *obj)
{
PyUpb_PbMsgCreator *creator = Check_MsgCreator(obj);
struct upb_string *name = creator->def->def->descriptor->name;
/* Need to get a NULL-terminated copy of name since PyString_FromFormat
* doesn't support ptr+len. */
PyObject *nameobj = PyString_FromStringAndSize(name->ptr, name->byte_len);
PyObject *ret = PyString_FromFormat("<upb.pb.MessageCreator for '%s'>",
PyString_AS_STRING(nameobj));
Py_DECREF(nameobj);
return ret;
}
static void creator_dealloc(PyObject *obj)
{
PyUpb_PbMsgCreator *creator = (void*)obj;
Py_DECREF(creator->def);
obj->ob_type->tp_free(obj);
}
static PyObject *creator_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyUpb_PbMsgCreator *creator = (void*)type->tp_alloc(type, 0);
PyUpb_MsgDef *def;
if(!PyArg_ParseTuple(args, "O!", &PyUpb_MsgDefType, &def)) return NULL;
creator->def = def;
Py_INCREF(creator->def);
return (PyObject*)creator;
}
static PyTypeObject PyUpb_PbMsgCreatorType = {
PyObject_HEAD_INIT(NULL)
0, /* ob_size */
"upb.pb.MessageCreator", /* tp_name */
sizeof(PyUpb_PbMsgCreator), /* tp_basicsize */
0, /* tp_itemsize */
creator_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
creator_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
creator_call, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
creator_new, /* tp_new */
0, /* tp_free */
};
/* upb.pb module **************************************************************/
static PyMethodDef methods[] = {
{NULL, NULL}
};
PyMODINIT_FUNC
initpb(void)
{
if(PyType_Ready(&PyUpb_PbBoundFieldOpType) < 0) return;
if(PyType_Ready(&PyUpb_PbMsgType) < 0) return;
if(PyType_Ready(&PyUpb_PbMsgCreatorType) < 0) return;
/* PyModule_AddObject steals a reference. These objects are statically
* allocated and must not be deleted, so we increment their refcount. */
Py_INCREF(&PyUpb_PbBoundFieldOpType);
Py_INCREF(&PyUpb_PbMsgType);
Py_INCREF(&PyUpb_PbMsgCreatorType);
PyObject *mod = Py_InitModule("upb.cext.pb", methods);
PyModule_AddObject(mod, "BoundFieldOp", (PyObject*)&PyUpb_PbBoundFieldOpType);
PyModule_AddObject(mod, "Message", (PyObject*)&PyUpb_PbMsgType);
PyModule_AddObject(mod, "MessageCreator", (PyObject*)&PyUpb_PbMsgCreatorType);
}

@ -1,11 +1,15 @@
from distutils.core import setup, Extension
setup(name='upb',
version='0.1',
ext_modules=[Extension('upb.definition', ['definition.c'],
include_dirs=['../../src', '../../descriptor'],
define_macros=[("UPB_USE_PTHREADS", 1),
("UPB_UNALIGNED_READS_OK", 1)],
library_dirs=['../../src'],
libraries=['upb_pic']
)],
ext_modules=[
Extension('upb.cext', ['definition.c', 'pb.c', 'cext.c'],
include_dirs=['../../src', '../../descriptor'],
define_macros=[("UPB_USE_PTHREADS", 1),
("UPB_UNALIGNED_READS_OK", 1)],
library_dirs=['../../src'],
libraries=['upb_pic'],
),
],
packages=['upb']
)

@ -12,7 +12,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h> /* for size_t. */
#include "upb_string.h"
#include "descriptor_const.h"
#ifdef __cplusplus
extern "C" {
@ -23,6 +23,9 @@ extern "C" {
#define INLINE static inline
#endif
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
/* The maximum that any submessages can be nested. Matches proto2's limit. */
#define UPB_MAX_NESTING 64
@ -55,12 +58,22 @@ typedef uint8_t upb_wire_type_t;
* errors, and we use it to represent exceptional circumstances. */
typedef uint8_t upb_field_type_t;
INLINE bool upb_issubmsgtype(upb_field_type_t type) {
return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
}
INLINE bool upb_isstringtype(upb_field_type_t type) {
return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
}
/* Information about a given value type (upb_field_type_t). */
struct upb_type_info {
uint8_t align;
uint8_t size;
upb_wire_type_t expected_wire_type;
struct upb_string ctype;
char *ctype;
};
/* Contains information for all .proto types. Indexed by upb_field_type_t. */
@ -90,6 +103,10 @@ struct upb_tag {
/* Polymorphic values of .proto types *****************************************/
struct upb_string;
struct upb_array;
struct upb_msg;
/* A single .proto value. The owner must have an out-of-band way of knowing
* the type, so that it knows which union member to use. */
union upb_value {
@ -121,15 +138,83 @@ union upb_value_ptr {
void *_void;
};
/* Unfortunately there is no way to define this so that it can be used as a
* generic expression, a la:
* foo(UPB_VALUE_ADDROF(bar));
* ...you have to use it as the initializer of a upb_value_ptr:
* union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
* foo(p);
*/
#define UPB_VALUE_ADDROF(val) {(void*)&val._double}
/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
* to know the field type to perform this operation, because we need to know
* how much memory to copy. */
INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) {
INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
upb_field_type_t ft) {
union upb_value val;
memcpy(&val, ptr._void, upb_type_info[t].size);
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
val.member_name = *ptr.member_name; \
break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
default: break;
}
#undef CASE
return val;
}
/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
* to know the field type to perform this operation, because we need to know
* how much memory to copy. */
INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
upb_field_type_t ft) {
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
*ptr.member_name = val.member_name; \
break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
default: break;
}
#undef CASE
}
union upb_symbol_ref {
struct upb_msgdef *msg;
struct upb_enum *_enum;

@ -23,7 +23,7 @@
#define UPB_ARRAY_H_
#include <stdlib.h>
#include "upb.h"
#include "upb_msg.h" /* Because we use upb_msg_fielddef */
#ifdef __cplusplus
extern "C" {
@ -31,41 +31,6 @@ extern "C" {
struct upb_string;
/* upb_arrays can be at most 2**32 elements long. */
typedef uint32_t upb_arraylen_t;
/* Represents an array (a repeated field) of any type. The interpretation of
* the data in the array depends on the type. */
struct upb_array {
union upb_value_ptr elements;
upb_arraylen_t len; /* Number of elements in "elements". */
upb_arraylen_t size; /* Memory we own (0 if by reference). */
void *gptr;
};
INLINE void upb_array_init(struct upb_array *arr)
{
arr->elements._void = NULL;
arr->len = 0;
arr->size = 0;
}
INLINE void upb_array_uninit(struct upb_array *arr)
{
if(arr->size) free(arr->elements._void);
}
INLINE struct upb_array *upb_array_new(void) {
struct upb_array *arr = malloc(sizeof(*arr));
upb_array_init(arr);
return arr;
}
INLINE void upb_array_free(struct upb_array *arr) {
upb_array_uninit(arr);
free(arr);
}
/* Returns a pointer to an array element. Does not perform a bounds check! */
INLINE union upb_value_ptr upb_array_getelementptr(
struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
@ -75,10 +40,17 @@ INLINE union upb_value_ptr upb_array_getelementptr(
return ptr;
}
INLINE union upb_value upb_array_getelement(
struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
/* Allocation/Deallocation/Resizing. ******************************************/
INLINE struct upb_array *upb_array_new(struct upb_msg_fielddef *f)
{
return upb_deref(upb_array_getelementptr(arr, n, type), type);
struct upb_array *arr = malloc(sizeof(*arr));
upb_mmhead_init(&arr->mmhead);
arr->elements._void = NULL;
arr->len = 0;
arr->size = 0;
arr->fielddef = f;
return arr;
}
INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
@ -94,13 +66,10 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
return v;
}
/* Resizes array to be "len" elements long and ensures we have write access
* to the array (reallocating if necessary). Returns true iff we were
* referencing memory for the array and dropped the reference. */
INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
upb_field_type_t type)
/* Resizes array to be "len" elements long (reallocating if necessary). */
INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen)
{
size_t type_size = upb_type_info[type].size;
size_t type_size = upb_type_info[arr->fielddef->type].size;
bool dropped = false;
bool ref = arr->size == 0; /* Ref'ing external memory. */
void *data = arr->elements._void;
@ -114,39 +83,11 @@ INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size);
dropped = true;
}
/* TODO: fill with defaults. */
arr->len = newlen;
return dropped;
}
/* These are all overlays on upb_array, pointers between them can be cast. */
#define UPB_DEFINE_ARRAY_TYPE(name, type) \
struct name ## _array { \
struct upb_fielddef *f; \
void *gptr; \
type *elements; \
upb_arraylen_t len; \
upb_arraylen_t size; \
};
UPB_DEFINE_ARRAY_TYPE(upb_double, double)
UPB_DEFINE_ARRAY_TYPE(upb_float, float)
UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t)
UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t)
UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
UPB_DEFINE_ARRAY_TYPE(upb_msg, void*)
/* Defines an array of a specific message type (an overlay of upb_array). */
#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
#define UPB_DEFINE_MSG_ARRAY(msg_type) \
UPB_MSG_ARRAY(msg_type) { \
msg_type **elements; \
upb_arraylen_t len; \
upb_arraylen_t size; \
};
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -10,6 +10,7 @@
#include "upb_context.h"
#include "upb_enum.h"
#include "upb_msg.h"
#include "upb_mm.h"
/* Search for a character in a string, in reverse. */
static int my_memrchr(char *data, char c, size_t len)
@ -66,7 +67,7 @@ static void free_context(struct upb_context *c)
{
free_symtab(&c->symtab);
for(size_t i = 0; i < c->fds_len; i++)
upb_msg_free((struct upb_msg*)c->fds[i]);
upb_msg_unref((struct upb_msg*)c->fds[i]);
free_symtab(&c->psymtab);
free(c->fds);
}
@ -77,9 +78,9 @@ void upb_context_unref(struct upb_context *c)
upb_rwlock_wrlock(&c->lock);
free_context(c);
upb_rwlock_unlock(&c->lock);
free(c);
upb_rwlock_destroy(&c->lock);
}
free(c);
upb_rwlock_destroy(&c->lock);
}
bool upb_context_lookup(struct upb_context *c, struct upb_string *symbol,
@ -325,10 +326,9 @@ bool upb_context_addfds(struct upb_context *c,
}
bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
google_protobuf_FileDescriptorSet *fds =
(google_protobuf_FileDescriptorSet*)upb_msg_parsenew(c->fds_msg, fds_str);
if(!fds) return false;
if(!upb_context_addfds(c, fds)) return false;
struct upb_msg *fds = upb_msg_new(c->fds_msg);
if(upb_msg_parsestr(fds, fds_str->ptr, fds_str->byte_len) != UPB_STATUS_OK) return false;
if(!upb_context_addfds(c, (google_protobuf_FileDescriptorSet*)fds)) return false;
{
/* We own fds now, need to keep a ref so we can free it later. */
@ -337,7 +337,7 @@ bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
c->fds_size *= 2;
c->fds = realloc(c->fds, c->fds_size);
}
c->fds[c->fds_len++] = fds;
c->fds[c->fds_len++] = (google_protobuf_FileDescriptorSet*)fds;
upb_rwlock_unlock(&c->lock);
}
return true;

@ -33,15 +33,6 @@ struct upb_enum_iton_entry {
struct upb_string *string;
};
INLINE void upb_enum_ref(struct upb_enum *e) {
if(upb_atomic_ref(&e->refcount)) upb_context_ref(e->context);
}
INLINE void upb_enum_unref(struct upb_enum *e) {
if(upb_atomic_unref(&e->refcount)) upb_context_unref(e->context);
}
/* Initializes and frees an enum, respectively. Caller retains ownership of
* ed, but it must outlive e. */
void upb_enum_init(struct upb_enum *e,

@ -15,6 +15,7 @@
#include "upb_array.h"
#include "upb_context.h"
#include "upb_enum.h"
#include "upb_mm.h"
#include "upb_msg.h"
#include "upb_parse.h"
#include "upb_serialize.h"

@ -0,0 +1,208 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_mm.h"
#include "upb_string.h"
#include "upb_array.h"
#include "upb_msg.h"
void upb_msg_destroy(struct upb_msg *msg) {
uint32_t i;
for(i = 0; i < msg->def->num_fields; i++) {
struct upb_msg_fielddef *f = &msg->def->fields[i];
if(!upb_msg_isset(msg, f) || !upb_field_ismm(f)) continue;
upb_mm_ptrtype type = upb_field_ptrtype(f);
union upb_mmptr mmptr = upb_mmptr_read(upb_msg_getptr(msg, f), type);
upb_mm_unref(mmptr, type);
}
free(msg);
}
void upb_array_destroy(struct upb_array *arr)
{
if(upb_elem_ismm(arr->fielddef)) {
upb_arraylen_t i;
/* Unref elements. */
for(i = 0; i < arr->len; i++) {
union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type);
upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef);
union upb_mmptr mmptr = upb_mmptr_read(p, type);
upb_mm_unref(mmptr, type);
}
}
if(arr->size != 0) free(arr->elements._void);
free(arr);
}
static union upb_mmptr upb_mm_newptr(upb_mm_ptrtype type,
struct upb_msg_fielddef *f)
{
union upb_mmptr p = {NULL};
switch(type) {
case UPB_MM_MSG_REF: p.msg = upb_msg_new(f->ref.msg);
case UPB_MM_STR_REF: p.str = upb_string_new();
case UPB_MM_ARR_REF: p.arr = upb_array_new(f);
default: assert(false); break;
}
return p;
}
static struct upb_mm_ref *find_or_create_ref(struct upb_mm_ref *fromref,
struct upb_mm *mm,
union upb_mmptr p, upb_mm_ptrtype type,
bool *created)
{
struct upb_mmhead *head = upb_mmhead_addr(p, type);
struct upb_mm_ref **ref = &head->refs;
while(*ref && (*ref)->mm <= mm) {
if((*ref)->mm == mm) {
return *ref;
*created = false;
}
ref = &((*ref)->next);
}
*created = true;
struct upb_mm_ref *newref = mm->newref_cb(fromref, p, type);
newref->p = p;
newref->type = type;
newref->mm = mm;
newref->next = *ref;
*ref = newref;
return newref;
}
struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
struct upb_mm *mm, bool *created)
{
return find_or_create_ref(NULL, mm, p, type, created);
}
struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm)
{
struct upb_msg *msg = upb_msg_new(def);
union upb_mmptr mmptr = {.msg = msg};
bool created;
struct upb_mm_ref *ref = find_or_create_ref(NULL, mm, mmptr, UPB_MM_MSG_REF, &created);
upb_mm_unref(mmptr, UPB_MM_MSG_REF); /* Shouldn't have any counted refs. */
assert(created);
return ref;
}
struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
struct upb_msg_fielddef *f,
bool *refcreated)
{
assert(upb_field_ismm(f));
upb_mm_ptrtype ptrtype = upb_field_ptrtype(f);
struct upb_msg *msg = msgref->p.msg;
union upb_mmptr val;
union upb_value_ptr p = upb_msg_getptr(msg, f);
/* Create the upb value if it doesn't already exist. */
if(!upb_msg_isset(msg, f)) {
upb_msg_set(msg, f);
val = upb_mm_newptr(ptrtype, f);
upb_mmptr_write(p, val, ptrtype);
} else {
val = upb_mmptr_read(p, ptrtype);
}
return find_or_create_ref(msgref, msgref->mm, val, ptrtype, refcreated);
}
struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
bool *refcreated)
{
struct upb_array *arr = arrref->p.arr;
struct upb_msg_fielddef *f = arr->fielddef;
assert(upb_elem_ismm(f));
assert(i < arr->len);
union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type);
upb_mm_ptrtype type = upb_elem_ptrtype(f);
union upb_mmptr val = upb_mmptr_read(p, type);
return find_or_create_ref(arrref, arrref->mm, val, type, refcreated);
}
void upb_mm_release(struct upb_mm_ref *ref)
{
struct upb_mm_ref **ref_head = (void*)ref->p.msg;
struct upb_mm_ref **ref_elem = ref_head;
struct upb_mm *mm = ref->mm;
while(true) {
assert(*ref_elem); /* Client asserts r->mm is in the list. */
if((*ref_elem)->mm == mm) {
*ref_elem = (*ref_elem)->next; /* Remove from the list. */
break;
}
}
if(upb_mmhead_norefs(&ref->p.msg->mmhead)) {
/* Destroy the dynamic object. */
switch(ref->type) {
case UPB_MM_MSG_REF:
upb_msg_destroy(ref->p.msg);
break;
case UPB_MM_ARR_REF:
upb_array_destroy(ref->p.arr);
break;
case UPB_MM_STR_REF:
upb_string_destroy(ref->p.str);
break;
default: assert(false); break;
}
}
}
void upb_mm_msg_set(struct upb_mm_ref *from_msg_ref, struct upb_mm_ref *to_ref,
struct upb_msg_fielddef *f)
{
assert(upb_field_ismm(f));
union upb_mmptr fromval = from_msg_ref->p;
union upb_mmptr toval = to_ref->p;
union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f);
upb_mm_ptrtype type = upb_field_ptrtype(f);
if(upb_msg_isset(fromval.msg, f)) {
union upb_mmptr existingval = upb_mmptr_read(field_p, type);
if(existingval.msg == toval.msg)
return; /* Setting to its existing value, do nothing. */
upb_mm_unref(existingval, type);
}
upb_msg_set(fromval.msg, f);
upb_mmptr_write(field_p, toval, type);
upb_mm_ref(toval, type);
}
void upb_mm_msgclear(struct upb_mm_ref *from_msg_ref, struct upb_msg_fielddef *f)
{
assert(upb_field_ismm(f));
union upb_mmptr fromval = from_msg_ref->p;
upb_mm_ptrtype type = upb_field_ptrtype(f);
if(upb_msg_isset(fromval.msg, f)) {
union upb_value_ptr field_p = upb_msg_getptr(fromval.msg, f);
union upb_mmptr existingval = upb_mmptr_read(field_p, type);
upb_msg_unset(fromval.msg, f);
upb_mm_unref(existingval, type);
}
}
void upb_mm_msgclear_all(struct upb_mm_ref *from)
{
struct upb_msgdef *def = from->p.msg->def;
for(uint32_t i = 0; i < def->num_fields; i++) {
struct upb_msg_fielddef *f = &def->fields[i];
if(!upb_field_ismm(f)) continue;
upb_mm_msgclear(from, f);
}
}
void upb_mm_arr_set(struct upb_mm_ref *from, struct upb_mm_ref *to,
upb_arraylen_t i, upb_field_type_t type)
{
(void)from;
(void)to;
(void)i;
(void)type;
}

@ -0,0 +1,168 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* A parsed protobuf is represented in memory as a tree. The three kinds of
* nodes in this tree are messages, arrays, and strings. This file defines
* a memory-management scheme for making sure that these nodes are colected
* at the right times.
*
* The basic strategy is reference-counting, but with a twist. Since any
* dynamic language that wishes to reference these nodes will need its own,
* language-specific structure, we provide two different kinds of references:
*
* - counted references. these are references that are tracked with only a
* reference count. They are used for two separate purposes:
* 1. for references within the tree, from one node to another.
* 2. for external references into the tree, where the referer does not need
* a separate message structure.
* - listed references. these are references that have their own separate
* data record. these separate records are kept in a linked list.
*/
#ifndef UPB_MM_H_
#define UPB_MM_H_
#include "upb.h"
#include "upb_string.h"
#include "upb_array.h"
#include "upb_msg.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Structure definitions. *****************************************************/
typedef int16_t upb_mm_id;
struct upb_msg;
struct upb_array;
struct upb_string;
struct upb_msg_fielddef;
struct upb_mm_ref;
/* Info about a mm. */
struct upb_mm {
/* fromref is set iff this call is from getfieldref or getelemref. */
struct upb_mm_ref *(*newref_cb)(struct upb_mm_ref *fromref,
union upb_mmptr p, upb_mm_ptrtype type);
};
struct upb_mm_ref {
union upb_mmptr p;
/* This is slightly wasteful, because the mm-specific ref will probably also
* contain the information about what kind of ref this is, in a different
* form. */
upb_mm_ptrtype type;
struct upb_mm *mm; /* TODO: There are ways to shrink this. */
struct upb_mm_ref *next; /* Linked list for refs to the same value. */
};
/* Functions for working with listed references. *****************************/
/* Create a new top-level message and create a single ref for it. */
struct upb_mm_ref *upb_mm_newmsg_ref(struct upb_msgdef *def, struct upb_mm *mm);
/* Given a pointer to an existing msg, array, or string, find a ref for this
* mm, creating one if necessary. 'created' indicates whether the returned
* reference was just created. */
struct upb_mm_ref *upb_mm_getref(union upb_mmptr p, upb_mm_ptrtype type,
struct upb_mm *mm, bool *created);
/* f must be ismm == true. The msg field may or may not be set (will be
* created if it doesn't exist). If a ref already exists for the given field,
* returns it, otherwise calls the given callback to create one. 'created'
* indicates whether a new reference was created. */
struct upb_mm_ref *upb_mm_getfieldref(struct upb_mm_ref *msgref,
struct upb_msg_fielddef *f,
bool *refcreated);
/* Array len must be < i. */
struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i,
bool *refcreated);
/* Remove this ref from the list for this msg.
* If that was the last reference, deletes the msg itself. */
void upb_mm_release(struct upb_mm_ref *ref);
void upb_mm_msgset(struct upb_mm_ref *msg, struct upb_mm_ref *to,
struct upb_msg_fielddef *f);
void upb_mm_msgclear(struct upb_mm_ref *from, struct upb_msg_fielddef *f);
void upb_mm_msgclear_all(struct upb_mm_ref *from);
void upb_mm_arrset(struct upb_mm_ref *from, struct upb_mm_ref *to, uint32_t i);
/* Defined iff upb_field_ismm(f). */
INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f);
/* Defined iff upb_elem_ismm(f). */
INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f);
INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type);
/* These methods are all a bit silly, since all branches of the case compile
* to the same thing (which the compiler will recognize), but we do it this way
* for full union correctness. */
INLINE union upb_mmptr upb_mmptr_read(union upb_value_ptr p, upb_mm_ptrtype t)
{
union upb_mmptr val;
switch(t) {
case UPB_MM_MSG_REF: val.msg = *p.msg; break;
case UPB_MM_STR_REF: val.str = *p.str; break;
case UPB_MM_ARR_REF: val.arr = *p.arr; break;
default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */
}
return val;
}
INLINE void upb_mmptr_write(union upb_value_ptr p, union upb_mmptr val,
upb_mm_ptrtype t)
{
switch(t) {
case UPB_MM_MSG_REF: *p.msg = val.msg; break;
case UPB_MM_STR_REF: *p.str = val.str; break;
case UPB_MM_ARR_REF: *p.arr = val.arr; break;
default: assert(false); val.msg = *p.msg; break; /* Shouldn't happen. */
}
}
void upb_array_destroy(struct upb_array *arr);
void upb_msg_destroy(struct upb_msg *msg);
INLINE void upb_msg_unref(struct upb_msg *msg) {
if(upb_mmhead_unref(&msg->mmhead)) upb_msg_destroy(msg);
}
INLINE void upb_array_unref(struct upb_array *arr) {
if(upb_mmhead_unref(&arr->mmhead)) upb_array_destroy(arr);
}
INLINE void upb_mm_unref(union upb_mmptr p, upb_mm_ptrtype type)
{
switch(type) {
case UPB_MM_MSG_REF: upb_msg_unref(p.msg); break;
case UPB_MM_STR_REF: upb_string_unref(p.str); break;
case UPB_MM_ARR_REF: upb_array_unref(p.arr);
}
}
static struct upb_mmhead *upb_mmhead_addr(union upb_mmptr p, upb_mm_ptrtype t)
{
switch(t) {
case UPB_MM_MSG_REF: return &((*p.msg).mmhead);
case UPB_MM_STR_REF: return &((*p.str).mmhead);
case UPB_MM_ARR_REF: return &((*p.arr).mmhead);
default: assert(false); return &((*p.msg).mmhead); /* Shouldn't happen. */
}
}
INLINE void upb_mm_ref(union upb_mmptr p, upb_mm_ptrtype type)
{
upb_mmhead_ref(upb_mmhead_addr(p, type));
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_MM_MSG_H_ */

@ -6,8 +6,9 @@
#include <inttypes.h>
#include <stdlib.h>
#include "descriptor.h"
#include "upb_msg.h"
#include "descriptor.h"
#include "upb_mm.h"
#include "upb_parse.h"
#include "upb_serialize.h"
#include "upb_text.h"
@ -47,7 +48,6 @@ bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
/* TODO: more complete validation. */
if(!d->set_flags.has.field) return false;
upb_atomic_refcount_init(&m->refcount, 0);
upb_inttable_init(&m->fields_by_num, d->field->len,
sizeof(struct upb_fieldsbynum_entry));
upb_strtable_init(&m->fields_by_name, d->field->len,
@ -123,113 +123,43 @@ void upb_msgdef_setref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
str_e->f.ref = ref;
}
/* Simple, one-shot parsing ***************************************************/
static void *upb_msg_new(struct upb_msgdef *md)
{
size_t size = md->size + (sizeof(void*) * 2);
struct upb_msg *msg = malloc(size);
memset(msg, 0, size);
msg->def = md;
return msg;
}
/* Parsing. ******************************************************************/
/* Allocation callbacks. */
struct upb_array *getarray_cb(
void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f)
{
(void)from_gptr;
(void)existingval; /* Don't care -- always zero. */
(void)f;
return upb_array_new();
}
struct upb_msg_parser_frame {
struct upb_msg *msg;
};
static struct upb_string *getstring_cb(
void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f,
bool byref)
{
(void)from_gptr;
(void)existingval; /* Don't care -- always zero. */
(void)f;
(void)byref;
return upb_strnew();
}
struct upb_msg_parser {
struct upb_stream_parser s;
bool merge;
bool byref;
struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
};
static struct upb_msg *getmsg_cb(
void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f)
{
(void)from_gptr;
(void)existingval; /* Don't care -- always zero. */
return upb_msg_new(f->ref.msg);
}
void upb_msg_parser_reset(struct upb_msg_parser *p,
struct upb_msg *msg, bool byref);
struct upb_msg *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s)
{
struct upb_msg_parser mp;
struct upb_msg *msg = upb_msg_new(md);
upb_msg_parser_reset(&mp, msg, false);
mp.getarray_cb = getarray_cb;
mp.getstring_cb = getstring_cb;
mp.getmsg_cb = getmsg_cb;
size_t read;
upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read);
if(status == UPB_STATUS_OK && read == s->byte_len) {
return msg;
} else {
upb_msg_free(msg);
return NULL;
}
}
/* Parses protocol buffer data out of data which has length of len. The data
* need not be a complete protocol buffer. The number of bytes parsed is
* returned in *read, and the next call to upb_msg_parse must supply data that
* is *read bytes past data in the logical stream. */
upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
void *data, size_t len, size_t *read);
/* For simple, one-shot parsing we assume that a dynamic field exists (and
* needs to be freed) iff its set bit is set. */
static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f)
{
if(upb_isstring(f)) {
free((*p.str)->ptr);
free(*p.str);
} else if(upb_issubmsg(f)) {
upb_msg_free(*p.msg);
}
}
void upb_msg_free(struct upb_msg *msg)
{
if(!msg) return; /* A very free-like thing to do. */
struct upb_msgdef *m = msg->def;
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_fielddef *f = &m->fields[i];
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
assert(*p.arr);
for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++)
free_value(upb_array_getelementptr(*p.arr, j, f->type), f);
upb_array_free(*p.arr);
} else {
free_value(p, f);
}
}
free(msg);
}
/* Parsing. ******************************************************************/
/* Helper function that returns a pointer to where the next value for field "f"
* should be stored, taking into account whether f is an array that may need to
* be allocated or resized. */
static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
struct upb_msg_fielddef *f,
void **gptr,
upb_msg_getandref_array_cb_t getarray_cb)
struct upb_msg_fielddef *f)
{
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
bool isset = upb_msg_isset(msg, f);
size_t len = isset ? (*p.arr)->len : 0;
if(!isset) *p.arr = getarray_cb(*gptr, *p.arr, f);
upb_array_resize(*p.arr, len+1, f->type);
*gptr = (*p.arr)->gptr;
if(!isset) *p.arr = upb_array_new(f);
upb_array_resize(*p.arr, len+1);
p = upb_array_getelementptr(*p.arr, len, f->type);
}
return p;
@ -255,8 +185,7 @@ static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end,
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msg *msg = mp->top->msg;
void *gptr = upb_msg_gptr(msg);
union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb);
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf));
return UPB_STATUS_OK;
@ -269,21 +198,20 @@ static void str_cb(void *udata, uint8_t *str,
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = udesc;
struct upb_msg *msg = mp->top->msg;
void *gptr = upb_msg_gptr(msg);
union upb_value_ptr p = get_value_ptr(msg, f, &gptr, mp->getarray_cb);
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
if(avail_len != total_len) abort(); /* TODO: support streaming. */
bool byref = avail_len == total_len && mp->byref;
*p.str = mp->getstring_cb(gptr, *p.str, f, byref);
if(byref) {
upb_strdrop(*p.str);
(*p.str)->ptr = (char*)str;
(*p.str)->byte_len = avail_len;
} else {
upb_stralloc(*p.str, total_len);
//bool byref = avail_len == total_len && mp->byref;
*p.str = upb_string_new();
//if(byref) {
// upb_strdrop(*p.str);
// (*p.str)->ptr = (char*)str;
// (*p.str)->byte_len = avail_len;
//} else {
upb_string_resize(*p.str, total_len);
memcpy((*p.str)->ptr, str, avail_len);
(*p.str)->byte_len = avail_len;
}
//}
}
static void submsg_start_cb(void *udata, void *user_field_desc)
@ -291,22 +219,39 @@ static void submsg_start_cb(void *udata, void *user_field_desc)
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msg *oldmsg = mp->top->msg;
void *gptr = upb_msg_gptr(oldmsg);
union upb_value_ptr p = get_value_ptr(oldmsg, f, &gptr, mp->getarray_cb);
union upb_value_ptr p = get_value_ptr(oldmsg, f);
struct upb_msg **submsg = p.msg;
//if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) {
// /* We can reuse the existing submsg. */
//} else {
*submsg = upb_msg_new(f->ref.msg);
//}
upb_msg_clear(*submsg);
upb_msg_set(oldmsg, f);
*p.msg = mp->getmsg_cb(gptr, *p.msg, f);
mp->top++;
mp->top->msg = *p.msg;
mp->top->msg = *submsg;
}
static void submsg_end_cb(void *udata)
{
struct upb_msg_parser *mp = udata;
struct upb_msg *msg = mp->top->msg;
/* TODO: free any remaining dynamic storage that was not reused. */
(void)msg;
mp->top--;
}
/* Externally-visible functions for the msg parser. */
upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len)
{
struct upb_msg_parser mp;
upb_msg_parser_reset(&mp, msg, false);
size_t read;
upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
return ret;
}
void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref)
{
upb_stream_parser_reset(&s->s, s);
@ -592,51 +537,3 @@ bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive)
}
return true;
}
static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd,
FILE *stream);
static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
FILE *stream)
{
struct upb_msgdef *m = msg->def;
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
struct upb_array *arr = *p.arr;
for(uint32_t j = 0; j < arr->len; j++) {
union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
printval(printer, elem_p, f, fd, stream);
}
} else {
printval(printer, p, f, fd, stream);
}
}
}
static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd,
FILE *stream)
{
if(upb_issubmsg(f)) {
upb_text_push(printer, fd->name, stream);
printmsg(printer, *p.msg, stream);
upb_text_pop(printer, stream);
} else {
upb_text_printfield(printer, fd->name, f->type, upb_deref(p, f->type), stream);
}
}
void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream)
{
struct upb_text_printer printer;
upb_text_printer_init(&printer, single_line);
printmsg(&printer, msg, stream);
}

@ -52,10 +52,10 @@
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#include "descriptor.h"
#include "upb.h"
#include "upb_atomic.h"
#include "upb_context.h"
#include "upb_parse.h"
#include "upb_table.h"
@ -66,10 +66,11 @@ extern "C" {
/* Message definition. ********************************************************/
struct upb_msg_fielddef;
struct upb_context;
/* Structure that describes a single .proto message type. */
struct upb_msgdef {
upb_atomic_refcount_t refcount;
struct upb_context *context;
struct upb_msg *default_msg; /* Message with all default values set. */
struct google_protobuf_DescriptorProto *descriptor;
struct upb_string fqname; /* Fully qualified. */
size_t size;
@ -82,7 +83,6 @@ struct upb_msgdef {
struct google_protobuf_FieldDescriptorProto **field_descriptors;
};
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
@ -96,14 +96,6 @@ struct upb_msg_fielddef {
upb_label_t label;
};
INLINE void upb_msgdef_ref(struct upb_msgdef *m) {
if(upb_atomic_ref(&m->refcount)) upb_context_ref(m->context);
}
INLINE void upb_msgdef_unref(struct upb_msgdef *m) {
if(upb_atomic_unref(&m->refcount)) upb_context_unref(m->context);
}
INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
return upb_issubmsgtype(f->type);
}
@ -114,6 +106,29 @@ INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
}
INLINE bool upb_field_ismm(struct upb_msg_fielddef *f) {
return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}
INLINE bool upb_elem_ismm(struct upb_msg_fielddef *f) {
return upb_isstring(f) || upb_issubmsg(f);
}
/* Defined iff upb_field_ismm(f). */
INLINE upb_mm_ptrtype upb_field_ptrtype(struct upb_msg_fielddef *f) {
if(upb_isarray(f)) return UPB_MM_ARR_REF;
else if(upb_isstring(f)) return UPB_MM_STR_REF;
else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
else return -1;
}
/* Defined iff upb_elem_ismm(f). */
INLINE upb_mm_ptrtype upb_elem_ptrtype(struct upb_msg_fielddef *f) {
if(upb_isstring(f)) return UPB_MM_STR_REF;
else if(upb_issubmsg(f)) return UPB_MM_MSG_REF;
else return -1;
}
/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
struct upb_msg_fielddef *f, struct upb_msgdef *m) {
@ -122,14 +137,15 @@ INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
/* Message structure. *********************************************************/
struct upb_msg {
struct upb_msgdef *def;
void *gptr; /* Generic pointer for use by subclasses. */
uint8_t data[1];
};
INLINE void *upb_msg_gptr(struct upb_msg *msg) {
return msg->gptr;
/* Constructs a new msg corresponding to the given msgdef, and having one
* counted reference. */
INLINE struct upb_msg *upb_msg_new(struct upb_msgdef *md) {
size_t size = md->size + offsetof(struct upb_msg, data);
struct upb_msg *msg = malloc(size);
memset(msg, 0, size);
upb_mmhead_init(&msg->mmhead);
msg->def = md;
return msg;
}
/* Field access. **************************************************************/
@ -146,12 +162,6 @@ INLINE union upb_value_ptr upb_msg_getptr(struct upb_msg *msg,
return p;
}
/* Returns a a specific field in a message. */
INLINE union upb_value upb_msg_get(struct upb_msg *msg,
struct upb_msg_fielddef *f) {
return upb_deref(upb_msg_getptr(msg, f), f->type);
}
/* "Set" flag reading and writing. *******************************************/
/* All upb code and code using upb should guarantee that the set flags are
@ -244,85 +254,10 @@ INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
}
/* Simple, one-shot parsing ***************************************************/
/* A simple interface for parsing into a newly-allocated message. This
* interface should only be used when the message will be read-only with
* respect to memory management (eg. won't add or remove internal references to
* dynamic memory). For more flexible (but also more complicated) interfaces,
* see below and in upb_mm_msg.h. */
/* Parses the protobuf in s (which is expected to be complete) and allocates
* new message data to hold it. If byref is set, strings in the returned
* upb_msg will reference s instead of copying from it, but this requires that
* s will live for as long as the returned message does. */
struct upb_msg *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s);
/* This function should be used to free messages that were parsed with
* upb_msg_parsenew. It will free the message appropriately (including all
* submessages). */
void upb_msg_free(struct upb_msg *msg);
/* Parsing with (re)allocation callbacks. *************************************/
/* This interface parses protocol buffers into upb_msgs, but allows the client
* to supply allocation callbacks whenever the parser needs to obtain a string,
* array, or submsg (a "dynamic field"). If the parser sees that a dynamic
* field is already present (its "set bit" is set) it will use that, resizing
* it if necessary in the case of an array. Otherwise it will call the
* allocation callback to obtain one.
*
* This may seem trivial (since nearly all clients will use malloc and free for
* memory management), but the allocation callback can be used for more than
* just allocation. If we are parsing data into an existing upb_msg, the
* allocation callback can examine any existing memory that is allocated for
* the dynamic field and determine whether it can reuse it. It can also
* perform memory management like refing the new field.
*
* This parser is layered on top of the event-based parser in upb_parse.h. The
* parser is upb_mm_msg.h is layered on top of this parser.
*
* This parser is fully streaming-capable. */
/* Should return an initialized array. */
typedef struct upb_array *(*upb_msg_getandref_array_cb_t)(
void *from_gptr, struct upb_array *existingval, struct upb_msg_fielddef *f);
/* Callback to allocate a string. If byref is true, the client should assume
* that the string will be referencing the input data. */
typedef struct upb_string *(*upb_msg_getandref_string_cb_t)(
void *from_gptr, struct upb_string *existingval, struct upb_msg_fielddef *f,
bool byref);
/* Should return a cleared message. */
typedef struct upb_msg *(*upb_msg_getandref_msg_cb_t)(
void *from_gptr, struct upb_msg *existingval, struct upb_msg_fielddef *f);
struct upb_msg_parser_frame {
struct upb_msg *msg;
};
struct upb_msg_parser {
struct upb_stream_parser s;
bool merge;
bool byref;
struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
upb_msg_getandref_array_cb_t getarray_cb;
upb_msg_getandref_string_cb_t getstring_cb;
upb_msg_getandref_msg_cb_t getmsg_cb;
};
void upb_msg_parser_reset(struct upb_msg_parser *p,
struct upb_msg *msg, bool byref);
/* Parses protocol buffer data out of data which has length of len. The data
* need not be a complete protocol buffer. The number of bytes parsed is
* returned in *read, and the next call to upb_msg_parse must supply data that
* is *read bytes past data in the logical stream. */
upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
void *data, size_t len, size_t *read);
/* Parsing ********************************************************************/
/* TODO: a stream parser. */
upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len);
/* Serialization *************************************************************/
@ -336,7 +271,7 @@ upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
struct upb_msgsizes;
/* Initialize/free a upb_msgsizes for the given message. */
void upb_msgsizes_init(struct upb_msgsizes *sizes);
struct upb_msgsizes *upb_msgsizes_new(void);
void upb_msgsizes_free(struct upb_msgsizes *sizes);
/* Given a previously initialized sizes, recurse over the message and store its
@ -366,6 +301,10 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s,
upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
void *buf, size_t len, size_t *written);
upb_status_t upb_msg_serialize_all(struct upb_msg *msg,
struct upb_msgsizes *sizes,
void *buf);
/* Text dump *****************************************************************/
bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive);
@ -397,7 +336,8 @@ void upb_msgdef_free(struct upb_msgdef *m);
/* Sort the given field descriptors in-place, according to what we think is an
* optimal ordering of fields. This can change from upb release to upb
* release. */
void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
void upb_msgdef_sortfds(struct google_protobuf_FieldDescriptorProto **fds,
size_t num);
/* Clients use this function on a previously initialized upb_msgdef to resolve
* the "ref" field in the upb_msg_fielddef. Since messages can refer to each

@ -11,7 +11,7 @@
/* May want to move this to upb.c if enough other things warrant it. */
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, UPB_STRLIT(#ctype)},
#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
struct upb_type_info upb_type_info[] = {
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float)

@ -21,16 +21,6 @@
extern "C" {
#endif
INLINE bool upb_issubmsgtype(upb_field_type_t type) {
return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
}
INLINE bool upb_isstringtype(upb_field_type_t type) {
return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
}
/* High-level parsing interface. **********************************************/
/* The general scheme is that the client registers callbacks that will be

@ -7,19 +7,20 @@
#include <stdio.h>
#include "upb_string.h"
bool upb_strreadfile(const char *filename, struct upb_string *data) {
struct upb_string *upb_strreadfile(const char *filename) {
FILE *f = fopen(filename, "rb");
if(!f) return false;
if(fseek(f, 0, SEEK_END) != 0) return false;
if(fseek(f, 0, SEEK_END) != 0) goto error;
long size = ftell(f);
if(size < 0) return false;
if(fseek(f, 0, SEEK_SET) != 0) return false;
data->byte_len = size;
upb_stralloc(data, data->byte_len);
if(fread(data->ptr, size, 1, f) != 1) {
free(data->ptr);
return false;
}
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
struct upb_string *s = upb_string_new();
upb_string_resize(s, size);
if(fread(s->ptr, size, 1, f) != 1) goto error;
fclose(f);
return true;
return s;
error:
fclose(f);
return NULL;
}

@ -32,48 +32,35 @@ extern "C" {
#include <stdlib.h>
#include <string.h>
/* inline if possible, emit standalone code if required. */
#ifndef INLINE
#define INLINE static inline
#endif
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
#include "upb_struct.h"
struct upb_string {
/* We expect the data to be 8-bit clean (uint8_t), but char* is such an
* ingrained convention that we follow it. */
char *ptr;
uint32_t byte_len;
uint32_t byte_size; /* How many bytes of ptr we own. */
};
/* Allocation/Deallocation/Resizing. ******************************************/
INLINE void upb_strinit(struct upb_string *str)
INLINE struct upb_string *upb_string_new(void)
{
struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
upb_mmhead_init(&str->mmhead);
str->ptr = NULL;
str->byte_len = 0;
str->byte_size = 0;
return str;
}
INLINE void upb_struninit(struct upb_string *str)
{
if(str->byte_size) free(str->ptr);
}
INLINE struct upb_string *upb_strnew(void)
/* For internal use only. */
INLINE void upb_string_destroy(struct upb_string *str)
{
struct upb_string *str = (struct upb_string*)malloc(sizeof(*str));
upb_strinit(str);
return str;
if(str->byte_size != 0) free(str->ptr);
free(str);
}
INLINE void upb_strfree(struct upb_string *str)
INLINE void upb_string_unref(struct upb_string *str)
{
upb_struninit(str);
free(str);
if(upb_mmhead_unref(&str->mmhead)) upb_string_destroy(str);
}
INLINE void upb_stralloc(struct upb_string *str, uint32_t size)
/* Resizes the string to size, reallocating if necessary. Does not preserve
* existing data. */
INLINE void upb_string_resize(struct upb_string *str, uint32_t size)
{
if(str->byte_size < size) {
/* Need to resize. */
@ -81,12 +68,10 @@ INLINE void upb_stralloc(struct upb_string *str, uint32_t size)
void *oldptr = str->byte_size == 0 ? NULL : str->ptr;
str->ptr = (char*)realloc(oldptr, str->byte_size);
}
str->byte_len = size;
}
INLINE void upb_strdrop(struct upb_string *str)
{
upb_struninit(str);
}
/* Library functions. *********************************************************/
INLINE bool upb_streql(struct upb_string *s1, struct upb_string *s2) {
return s1->byte_len == s2->byte_len &&
@ -101,26 +86,26 @@ INLINE int upb_strcmp(struct upb_string *s1, struct upb_string *s2) {
INLINE void upb_strcpy(struct upb_string *dest, struct upb_string *src) {
dest->byte_len = src->byte_len;
upb_stralloc(dest, dest->byte_len);
upb_string_resize(dest, dest->byte_len);
memcpy(dest->ptr, src->ptr, src->byte_len);
}
INLINE struct upb_string *upb_strdup(struct upb_string *s) {
struct upb_string *copy = upb_strnew();
struct upb_string *copy = upb_string_new();
upb_strcpy(copy, s);
return copy;
}
INLINE struct upb_string *upb_strdupc(char *s) {
struct upb_string *copy = upb_strnew();
struct upb_string *copy = upb_string_new();
copy->byte_len = strlen(s);
upb_stralloc(copy, copy->byte_len);
upb_string_resize(copy, copy->byte_len);
memcpy(copy->ptr, s, copy->byte_len);
return copy;
}
/* Reads an entire file into a newly-allocated string. */
bool upb_strreadfile(const char *filename, struct upb_string *data);
struct upb_string *upb_strreadfile(const char *filename);
/* Allows defining upb_strings as literals, ie:
* struct upb_string str = UPB_STRLIT("Hello, World!\n");

@ -0,0 +1,119 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file defines the in-memory format for messages, arrays, and strings
* (which are the three dynamically-allocated structures that make up all
* protobufs). */
#ifndef UPB_STRUCT_H
#define UPB_STRUCT_H
#include "upb.h"
/* mmhead -- this is a "base class" for strings, arrays, and messages ********/
struct upb_mm_ref;
struct upb_mmhead {
struct upb_mm_ref *refs; /* Head of linked list. */
uint32_t refcount;
};
INLINE void upb_mmhead_init(struct upb_mmhead *head) {
head->refs = NULL;
head->refcount = 1;
}
INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) {
return head->refcount == 0 && head->refs == NULL;
}
INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
head->refcount--;
return upb_mmhead_norefs(head);
}
INLINE void upb_mmhead_ref(struct upb_mmhead *head) {
head->refcount++;
}
/* Structures for msg, string, and array. *************************************/
/* These are all self describing. */
struct upb_msgdef;
struct upb_msg_fielddef;
struct upb_msg {
struct upb_mmhead mmhead;
struct upb_msgdef *def;
uint8_t data[1];
};
typedef uint32_t upb_arraylen_t; /* can be at most 2**32 elements long. */
struct upb_array {
struct upb_mmhead mmhead;
struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */
union upb_value_ptr elements;
upb_arraylen_t len; /* Number of elements in "elements". */
upb_arraylen_t size; /* Memory we own (0 if by reference). */
};
struct upb_string {
struct upb_mmhead mmhead;
/* We expect the data to be 8-bit clean (uint8_t), but char* is such an
* ingrained convention that we follow it. */
char *ptr;
uint32_t byte_len;
uint32_t byte_size; /* How many bytes of ptr we own, 0 if we reference. */
};
/* Type-specific overlays on upb_array. ***************************************/
#define UPB_DEFINE_ARRAY_TYPE(name, type) \
struct name ## _array { \
struct upb_mmhead mmhead; \
struct upb_msg_fielddef *fielddef; \
type elements; \
upb_arraylen_t len; \
upb_arraylen_t size; \
};
UPB_DEFINE_ARRAY_TYPE(upb_double, double)
UPB_DEFINE_ARRAY_TYPE(upb_float, float)
UPB_DEFINE_ARRAY_TYPE(upb_int32, int32_t)
UPB_DEFINE_ARRAY_TYPE(upb_int64, int64_t)
UPB_DEFINE_ARRAY_TYPE(upb_uint32, uint32_t)
UPB_DEFINE_ARRAY_TYPE(upb_uint64, uint64_t)
UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
UPB_DEFINE_ARRAY_TYPE(upb_msg, void*)
/* Defines an array of a specific message type (an overlay of upb_array). */
#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
#define UPB_DEFINE_MSG_ARRAY(msg_type) \
UPB_MSG_ARRAY(msg_type) { \
struct upb_mmhead mmhead; \
struct upb_msg_fielddef *fielddef; \
msg_type **elements; \
upb_arraylen_t len; \
upb_arraylen_t size; \
};
/* mmptr -- a pointer which polymorphically points to one of the above. *******/
union upb_mmptr {
struct upb_msg *msg;
struct upb_array *arr;
struct upb_string *str;
};
enum {
UPB_MM_MSG_REF,
UPB_MM_STR_REF,
UPB_MM_ARR_REF
};
typedef uint8_t upb_mm_ptrtype;
#endif

@ -5,8 +5,11 @@
*/
#include <inttypes.h>
#include "upb_text.h"
#include "descriptor.h"
#include "upb_text.h"
#include "upb_string.h"
#include "upb_msg.h"
#include "upb_array.h"
void upb_text_printval(upb_field_type_t type, union upb_value val, FILE *file)
{
@ -78,3 +81,52 @@ void upb_text_pop(struct upb_text_printer *p,
print_indent(p, stream);
fprintf(stream, "}\n");
}
static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd,
FILE *stream);
static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
FILE *stream)
{
struct upb_msgdef *m = msg->def;
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
struct upb_array *arr = *p.arr;
for(uint32_t j = 0; j < arr->len; j++) {
union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
printval(printer, elem_p, f, fd, stream);
}
} else {
printval(printer, p, f, fd, stream);
}
}
}
static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd,
FILE *stream)
{
if(upb_issubmsg(f)) {
upb_text_push(printer, fd->name, stream);
printmsg(printer, *p.msg, stream);
upb_text_pop(printer, stream);
} else {
upb_text_printfield(printer, fd->name, f->type, upb_value_read(p, f->type), stream);
}
}
void upb_msg_print(struct upb_msg *msg, bool single_line, FILE *stream)
{
struct upb_text_printer printer;
upb_text_printer_init(&printer, single_line);
printmsg(&printer, msg, stream);
}

@ -15,6 +15,8 @@
#include "upb_enum.h"
#include "upb_msg.h"
#include "upb_text.h"
#include "upb_array.h"
#include "upb_mm.h"
/* These are in-place string transformations that do not change the length of
* the string (and thus never need to re-allocate). */
@ -54,32 +56,24 @@ void *strtable_to_array(struct upb_strtable *t, int *size)
return array;
}
/* The .h file defines structs for the types defined in the .proto file. It
* also defines constants for the enum values.
*
* Assumes that d has been validated. */
static void write_h(struct upb_symtab_entry *entries[], int num_entries,
char *outfile_name, char *descriptor_cident, FILE *stream)
/* The _const.h file defines the constants (enums) defined in the .proto
* file. */
static void write_const_h(struct upb_symtab_entry *entries[], int num_entries,
char *outfile_name, FILE *stream)
{
/* Header file prologue. */
struct upb_string *include_guard_name = upb_strdupc(outfile_name);
to_preproc(include_guard_name);
/* A bit cheesy, but will do the job. */
include_guard_name->ptr[include_guard_name->byte_len-1] = 'C';
fputs("/* This file was generated by upbc (the upb compiler). "
"Do not edit. */\n\n", stream),
fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name));
fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name));
fputs("#include <upb_string.h>\n\n", stream);
fputs("#include <upb_array.h>\n\n", stream);
fputs("#ifdef __cplusplus\n", stream);
fputs("extern \"C\" {\n", stream);
fputs("#endif\n\n", stream);
if(descriptor_cident) {
fputs("struct google_protobuf_FileDescriptorSet;\n", stream);
fprintf(stream, "extern struct google_protobuf_FileDescriptorSet *%s;\n\n",
descriptor_cident);
}
/* Enums. */
fprintf(stream, "/* Enums. */\n\n");
for(int i = 0; i < num_entries; i++) { /* Foreach enum */
@ -109,12 +103,45 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
UPB_STRARG(enum_val_prefix), UPB_STRARG(value_name), v->number);
if(j != ed->value->len-1) fputc(',', stream);
fputc('\n', stream);
upb_strfree(value_name);
upb_string_unref(value_name);
}
}
fprintf(stream, "} " UPB_STRFMT ";\n\n", UPB_STRARG(enum_name));
upb_strfree(enum_name);
upb_strfree(enum_val_prefix);
upb_string_unref(enum_name);
upb_string_unref(enum_val_prefix);
}
/* Epilogue. */
fputs("#ifdef __cplusplus\n", stream);
fputs("} /* extern \"C\" */\n", stream);
fputs("#endif\n\n", stream);
fprintf(stream, "#endif /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name));
upb_string_unref(include_guard_name);
}
/* The .h file defines structs for the types defined in the .proto file. It
* also defines constants for the enum values.
*
* Assumes that d has been validated. */
static void write_h(struct upb_symtab_entry *entries[], int num_entries,
char *outfile_name, char *descriptor_cident, FILE *stream)
{
/* Header file prologue. */
struct upb_string *include_guard_name = upb_strdupc(outfile_name);
to_preproc(include_guard_name);
fputs("/* This file was generated by upbc (the upb compiler). "
"Do not edit. */\n\n", stream),
fprintf(stream, "#ifndef " UPB_STRFMT "\n", UPB_STRARG(include_guard_name));
fprintf(stream, "#define " UPB_STRFMT "\n\n", UPB_STRARG(include_guard_name));
fputs("#include <upb_struct.h>\n\n", stream);
fputs("#ifdef __cplusplus\n", stream);
fputs("extern \"C\" {\n", stream);
fputs("#endif\n\n", stream);
if(descriptor_cident) {
fputs("struct google_protobuf_FileDescriptorSet;\n", stream);
fprintf(stream, "extern struct google_protobuf_FileDescriptorSet *%s;\n\n",
descriptor_cident);
}
/* Forward declarations. */
@ -131,7 +158,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
fprintf(stream, "struct " UPB_STRFMT ";\n", UPB_STRARG(msg_name));
fprintf(stream, "typedef struct " UPB_STRFMT "\n " UPB_STRFMT ";\n\n",
UPB_STRARG(msg_name), UPB_STRARG(msg_name));
upb_strfree(msg_name);
upb_string_unref(msg_name);
}
/* Message Declarations. */
@ -144,8 +171,8 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
struct upb_string *msg_name = upb_strdup(&entry->e.key);
to_cident(msg_name);
fprintf(stream, "struct " UPB_STRFMT " {\n", UPB_STRARG(msg_name));
fputs(" struct upb_mmhead mmhead;\n", stream);
fputs(" struct upb_msgdef *def;\n", stream);
fputs(" void *gptr;\n", stream);
fputs(" union {\n", stream);
fprintf(stream, " uint8_t bytes[%" PRIu32 "];\n", m->set_flags_bytes);
fputs(" struct {\n", stream);
@ -179,7 +206,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
fprintf(stream, " " UPB_STRFMT "* " UPB_STRFMT ";\n",
UPB_STRARG(type_name), UPB_STRARG(fd->name));
}
upb_strfree(type_name);
upb_string_unref(type_name);
} else if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
static char* c_types[] = {
"", "struct upb_double_array*", "struct upb_float_array*",
@ -208,7 +235,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
fputs("};\n", stream);
fprintf(stream, "UPB_DEFINE_MSG_ARRAY(" UPB_STRFMT ")\n\n",
UPB_STRARG(msg_name));
upb_strfree(msg_name);
upb_string_unref(msg_name);
}
/* Epilogue. */
@ -216,7 +243,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
fputs("} /* extern \"C\" */\n", stream);
fputs("#endif\n\n", stream);
fprintf(stream, "#endif /* " UPB_STRFMT " */\n", UPB_STRARG(include_guard_name));
upb_strfree(include_guard_name);
upb_string_unref(include_guard_name);
}
/* Format of table entries that we use when analyzing data structures for
@ -300,24 +327,28 @@ static void add_strings_from_msg(void *data, struct upb_msgdef *m,
struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t,
struct upb_msg_fielddef *f)
{
struct upb_string type_name = upb_issubmsg(f) ? f->ref.msg->fqname :
upb_type_info[f->type].ctype;
struct typetable_entry *type_e = upb_strtable_lookup(t, &type_name);
struct upb_string *type_name = upb_issubmsg(f) ? upb_strdup(&f->ref.msg->fqname) :
upb_strdupc(upb_type_info[f->type].ctype);
struct typetable_entry *type_e = upb_strtable_lookup(t, type_name);
if(type_e == NULL) {
struct typetable_entry new_type_e = {
.e = {.key = type_name}, .field = f, .cident = upb_strdup(&type_name),
.e = {.key = *type_name}, .field = f, .cident = upb_strdup(type_name),
.values = NULL, .values_size = 0, .values_len = 0,
.arrays = NULL, .arrays_size = 0, .arrays_len = 0
};
to_cident(new_type_e.cident);
assert(upb_strtable_lookup(t, type_name) == NULL);
assert(upb_strtable_lookup(t, &new_type_e.e.key) == NULL);
upb_strtable_insert(t, &new_type_e.e);
type_e = upb_strtable_lookup(t, &type_name);
type_e = upb_strtable_lookup(t, type_name);
assert(type_e);
} else {
upb_string_unref(type_name);
}
return type_e;
}
static void add_value(union upb_value value, struct upb_msg_fielddef *f,
static void add_value(union upb_value_ptr p, struct upb_msg_fielddef *f,
struct upb_strtable *t)
{
struct typetable_entry *type_e = get_or_insert_typeentry(t, f);
@ -325,7 +356,7 @@ static void add_value(union upb_value value, struct upb_msg_fielddef *f,
type_e->values_size = UPB_MAX(type_e->values_size * 2, 4);
type_e->values = realloc(type_e->values, sizeof(*type_e->values) * type_e->values_size);
}
type_e->values[type_e->values_len++] = value;
type_e->values[type_e->values_len++] = upb_value_read(p, f->type);
}
static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t)
@ -353,7 +384,7 @@ static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t
/* Add the individual values in the array. */
for(uint32_t j = 0; j < arr->len; j++)
add_value(upb_array_getelement(arr, j, f->type), f, t);
add_value(upb_array_getelementptr(arr, j, f->type), f, t);
/* Add submsgs. We must do this separately so that the msgs in this
* array are contiguous (and don't have submsgs of the same type
@ -362,7 +393,7 @@ static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t
add_submsgs(*upb_array_getelementptr(arr, j, f->type).msg, f->ref.msg, t);
} else {
if(!upb_issubmsg(f)) continue;
add_value(upb_deref(p, f->type), f, t);
add_value(p, f, t);
add_submsgs(*p.msg, f->ref.msg, t);
}
}
@ -445,7 +476,8 @@ static void write_message_c(void *data, struct upb_msgdef *m,
.type = GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE,
.ref = {.msg = m}
};
add_value(val, &fake_field, &types);
union upb_value_ptr p = UPB_VALUE_ADDROF(val);
add_value(p, &fake_field, &types);
add_submsgs(data, m, &types);
/* Emit foward declarations for all msgs of all types, and define arrays. */
@ -503,7 +535,7 @@ static void write_message_c(void *data, struct upb_msgdef *m,
for(unsigned int j = 0; j < m->num_fields; j++) {
struct upb_msg_fielddef *f = &m->fields[j];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
union upb_value val = upb_msg_get(msgdata, f);
union upb_value val = upb_value_read(upb_msg_getptr(msgdata, f), f->type);
fprintf(stream, " ." UPB_STRFMT " = ", UPB_STRARG(fd->name));
if(!upb_msg_isset(msgdata, f)) {
fputs("0, /* Not set. */", stream);
@ -571,7 +603,7 @@ static void write_message_c(void *data, struct upb_msgdef *m,
/* Free tables. */
for(e = upb_strtable_begin(&types); e; e = upb_strtable_next(&types, &e->e)) {
upb_strfree(e->cident);
upb_string_unref(e->cident);
free(e->values);
free(e->arrays);
}
@ -641,16 +673,17 @@ int main(int argc, char *argv[])
if(!outfile_base) outfile_base = input_file;
/* Read input file. */
struct upb_string *descriptor = upb_strnew();
if(!upb_strreadfile(input_file, descriptor))
struct upb_string *descriptor = upb_strreadfile(input_file);
if(!descriptor)
error("Couldn't read input file.");
/* Parse input file. */
struct upb_context *c = upb_context_new();
struct upb_msg *fds_msg = upb_msg_parsenew(c->fds_msg, descriptor);
google_protobuf_FileDescriptorSet *fds = (void*)fds_msg;
if(!fds)
struct upb_msg *fds_msg = upb_msg_new(c->fds_msg);
if(upb_msg_parsestr(fds_msg, descriptor->ptr, descriptor->byte_len) != UPB_STATUS_OK)
error("Failed to parse input file descriptor.");
//upb_msg_print(fds_msg, false, stderr);
google_protobuf_FileDescriptorSet *fds = (void*)fds_msg;
if(!upb_context_addfds(c, fds))
error("Failed to resolve symbols in descriptor.\n");
@ -666,17 +699,21 @@ int main(int argc, char *argv[])
/* Emit output files. */
const int maxsize = 256;
char h_filename[maxsize], c_filename[maxsize];
char h_filename[maxsize], h_const_filename[maxsize], c_filename[maxsize];
if(snprintf(h_filename, maxsize, "%s.h", outfile_base) >= maxsize ||
snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize)
snprintf(c_filename, maxsize, "%s.c", outfile_base) >= maxsize ||
snprintf(h_const_filename, maxsize, "%s_const.h", outfile_base) >= maxsize)
error("File base too long.\n");
FILE *h_file = fopen(h_filename, "w");
if(!h_file) error("Failed to open .h output file");
FILE *h_const_file = fopen(h_const_filename, "w");
if(!h_const_file) error("Failed to open _const.h output file");
int symcount;
struct upb_symtab_entry **entries = strtable_to_array(&c->symtab, &symcount);
write_h(entries, symcount, h_filename, cident, h_file);
write_const_h(entries, symcount, h_filename, h_const_file);
free(entries);
if(cident) {
FILE *c_file = fopen(c_filename, "w");
@ -684,10 +721,11 @@ int main(int argc, char *argv[])
write_message_c(fds, c->fds_msg, cident, h_filename, argc, argv, input_file, c_file);
fclose(c_file);
}
upb_msg_free(fds_msg);
upb_msg_unref(fds_msg);
upb_context_unref(c);
upb_strfree(descriptor);
upb_string_unref(descriptor);
fclose(h_file);
fclose(h_const_file);
return 0;
}

Loading…
Cancel
Save