Beginnings of a prototype Ruby extension.

pull/13171/head
Josh Haberman 10 years ago
parent 43f2455cbd
commit 8405e5e389
  1. 33
      Makefile
  2. 2
      upb/bindings/ruby/README
  3. 7
      upb/bindings/ruby/extconf.rb
  4. 300
      upb/bindings/ruby/upb.c

@ -47,7 +47,7 @@ endif
CC=gcc
CXX=g++
CFLAGS=-std=gnu99
CXXFLAGS=-std=c++11
CXXFLAGS=
INCLUDE=-Itests -I.
CPPFLAGS=$(INCLUDE) -Wall -Wextra -Wno-sign-compare $(USER_CFLAGS)
LDLIBS=-lpthread upb/libupb.a
@ -62,6 +62,9 @@ else
E=@:
endif
install:
test -f upb/bindings/ruby/upb.so && cd upb/bindings/ruby && make install
# Dependency generating. #######################################################
-include deps
@ -85,8 +88,6 @@ CORE= \
upb/def.c \
upb/descriptor/reader.c \
upb/descriptor/descriptor.upb.c \
upb/bindings/googlepb/bridge.cc \
upb/bindings/googlepb/proto2.cc \
upb/handlers.c \
upb/refcounted.c \
upb/shim/shim.c \
@ -94,7 +95,9 @@ CORE= \
upb/table.c \
upb/upb.c \
# TODO: the proto2 bridge should be built as a separate library.
GOOGLEPB= \
upb/bindings/googlepb/bridge.cc \
upb/bindings/googlepb/proto2.cc \
# Library for the protocol buffer format (both text and binary).
PB= \
@ -120,6 +123,8 @@ clean_leave_profile:
rm -rf tools/upbc deps
rm -rf bindings/lua/upb.so
rm -rf bindings/python/build
rm -rf upb/bindings/ruby/Makefile
rm -rf upb/bindings/ruby/upb.so
clean: clean_leave_profile
rm -rf $(call rwildcard,,*.gcno) $(call rwildcard,,*.gcda)
@ -219,14 +224,16 @@ SIMPLE_TESTS= \
tests/test_handlers
SIMPLE_CXX_TESTS= \
tests/test_cpp \
tests/pb/test_decoder
tests/pb/test_decoder \
# tests/test_cpp \
VARIADIC_TESTS= \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) tests/test_table
#TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS) tests/test_table
TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) tests/test_table
tests: $(TESTS) $(INTERACTIVE_TESTS)
@ -501,3 +508,15 @@ $(PYTHONEXT): $(LIBUPB_PIC) bindings/python/upb.c
pythontest: $(PYTHONEXT)
cd bindings/python && cp test.py build/install/lib/python && valgrind $(PYTHON) ./build/install/lib/python/test.py
# Ruby extension ###############################################################
RUBY=ruby
RUBYEXT=upb/bindings/ruby/upb.so
ruby: $(RUBYEXT)
upb/bindings/ruby/Makefile: upb/bindings/ruby/extconf.rb
$(E) RUBY upb/bindings/ruby/extconf.rb
$(Q) cd upb/bindings/ruby && ruby extconf.rb
$(RUBYEXT): $(LIBUPB_PIC) upb/bindings/ruby/upb.c upb/bindings/ruby/Makefile
$(E) CC upb/bindings/ruby/upb.c
$(Q) cd upb/bindings/ruby && make

@ -0,0 +1,2 @@
This is PROTOTYPE code -- all interfaces are experimental
and will almost certainly change.

@ -0,0 +1,7 @@
#!/usr/bin/ruby
require 'mkmf'
find_header("upb/upb.h", "../../..") or raise "Can't find upb headers"
find_library("upb_pic", "upb_msgdef_new", "../..") or raise "Can't find upb lib"
$CFLAGS += " -Wall"
create_makefile("upb")

@ -0,0 +1,300 @@
#include "ruby.h"
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/pb/glue.h"
#include "upb/shim/shim.h"
#include "upb/symtab.h"
static VALUE cMessageDef;
static VALUE cMessage;
// Wrapper around a upb_msgdef.
typedef struct {
// The msgdef for this message, and a DecoderMethod to parse protobufs and
// fill a message.
//
// We own refs on both of these.
const upb_msgdef *md;
const upb_pbdecodermethod *fill_method;
size_t size;
uint32_t *field_offsets;
} rb_msgdef;
// Ruby message object.
// This will be sized according to what fields are actually present.
typedef struct {
union u {
VALUE rbmsgdef;
char data[1];
} data;
} rb_msg;
#define DEREF(msg, ofs, type) *(type*)(&msg->data.data[ofs])
static void symtab_free(void *md) {
upb_symtab_unref(md, UPB_UNTRACKED_REF);
}
void rupb_checkstatus(upb_status *s) {
if (!upb_ok(s)) {
fprintf(stderr, "YO, error! %s", upb_status_errmsg(s));
rb_raise(rb_eRuntimeError, "%s", upb_status_errmsg(s));
} else {
fprintf(stderr, "A-OK!");
}
}
/* handlers *******************************************************************/
// These are handlers for populating a Ruby protobuf message when parsing.
static size_t strhandler(void *closure, const void *hd, const char *str,
size_t len, const upb_bufhandle *handle) {
rb_msg *msg = closure;
const size_t *ofs = hd;
DEREF(msg, *ofs, VALUE) = rb_str_new(str, len);
return len;
}
static const void *newhandlerdata(upb_handlers *h, uint32_t ofs) {
size_t *hd_ofs = ALLOC(size_t);
*hd_ofs = ofs;
upb_handlers_addcleanup(h, hd_ofs, free);
return hd_ofs;
}
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
// XXX: Doesn't support submessages properly yet.
const rb_msgdef *rmd = closure;
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (upb_fielddef_isseq(f)) {
rb_raise(rb_eRuntimeError, "Doesn't support repeated fields yet.");
}
size_t ofs = rmd->field_offsets[upb_fielddef_index(f)];
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
upb_shim_set(h, f, ofs, -1);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, ofs));
// XXX: does't currently handle split buffers.
upb_handlers_setstring(h, f, strhandler, &attr);
upb_handlerattr_uninit(&attr);
break;
}
case UPB_TYPE_MESSAGE:
rb_raise(rb_eRuntimeError, "Doesn't support submessages yet.");
break;
}
}
}
// Creates upb handlers for populating a message.
static const upb_handlers *new_fill_handlers(const rb_msgdef *rmd,
const void *owner) {
return upb_handlers_newfrozen(rmd->md, owner, add_handlers_for_message, rmd);
}
// General alignment rules are that each type needs to be stored at an address
// that is a multiple of its size.
static size_t align_up(size_t val, size_t align) {
return val % align == 0 ? val : val + align - (val % align);
}
// Byte size to store each upb type.
static size_t rupb_sizeof(upb_fieldtype_t type) {
switch (type) {
case UPB_TYPE_BOOL:
return 1;
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
return 4;
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
case UPB_TYPE_DOUBLE:
return 8;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case UPB_TYPE_MESSAGE:
return sizeof(VALUE);
}
assert(false);
}
/* msg ************************************************************************/
static void msg_free(void *msg) {
free(msg);
}
// Invoked by the Ruby GC whenever it is doing a mark-and-sweep.
static void msg_mark(void *p) {
rb_msg *msg = p;
rb_msgdef *rmd;
Data_Get_Struct(msg->data.rbmsgdef, rb_msgdef, rmd);
// We need to mark all references to other Ruby values: strings, arrays, and
// submessages that we point to. Only strings are implemented so far.
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (upb_fielddef_isstring(f)) {
size_t ofs = rmd->field_offsets[upb_fielddef_index(f)];
rb_gc_mark(DEREF(msg, ofs, VALUE));
}
}
}
VALUE msg_new(VALUE msgdef) {
const rb_msgdef *rmd;
Data_Get_Struct(msgdef, rb_msgdef, rmd);
rb_msg *msg = (rb_msg*)ALLOC_N(char, rmd->size);
memset(msg, 0, rmd->size);
msg->data.rbmsgdef = msgdef;
VALUE ret = Data_Wrap_Struct(cMessage, msg_mark, msg_free, msg);
return ret;
}
/* msgdef *********************************************************************/
static void msgdef_free(void *_rmd) {
rb_msgdef *rmd = _rmd;
upb_msgdef_unref(rmd->md, &rmd->md);
upb_pbdecodermethod_unref(rmd->fill_method, &rmd->fill_method);
free(rmd->field_offsets);
}
const upb_pbdecodermethod *new_fillmsg_decodermethod(const rb_msgdef *rmd,
const void *owner) {
const upb_handlers *fill_handlers = new_fill_handlers(rmd, &fill_handlers);
upb_pbdecodermethodopts opts;
upb_pbdecodermethodopts_init(&opts, fill_handlers);
const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner);
upb_handlers_unref(fill_handlers, &fill_handlers);
return ret;
}
// Calculates offsets for each field.
//
// This lets us pack protos like structs instead of storing them like
// dictionaries. This speeds up a parsing a lot and also saves memory
// (unless messages are very sparse).
static void assign_offsets(rb_msgdef *rmd) {
size_t ofs = sizeof(rb_msgdef); // Msg starts with msgdef pointer.
upb_msg_iter i;
for (upb_msg_begin(&i, rmd->md); !upb_msg_done(&i); upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
size_t field_size = rupb_sizeof(upb_fielddef_type(f));
ofs = align_up(ofs, field_size); // Align field properly.
rmd->field_offsets[upb_fielddef_index(f)] = ofs;
ofs += field_size;
}
rmd->size = ofs;
}
// Constructs a new Ruby wrapper object around the given msgdef.
static VALUE make_msgdef(const upb_msgdef *md) {
rb_msgdef *rmd;
VALUE ret = Data_Make_Struct(cMessageDef, rb_msgdef, NULL, msgdef_free, rmd);
upb_msgdef_ref(md, &rmd->md);
rmd->md = md;
rmd->field_offsets = ALLOC_N(uint32_t, upb_msgdef_numfields(md));
rmd->fill_method = new_fillmsg_decodermethod(rmd, &rmd->fill_method);
assign_offsets(rmd);
return ret;
}
// Loads a descriptor and constructs a MessageDef to the named message.
static VALUE msgdef_load(VALUE klass, VALUE descriptor, VALUE message_name) {
upb_symtab *symtab = upb_symtab_new(UPB_UNTRACKED_REF);
// Wrap the symtab in a Ruby object so it gets GC'd.
// In a real wrapper we would wrap this object more fully (ie. expose its
// methods to Ruby callers).
Data_Wrap_Struct(rb_cObject, NULL, symtab_free, symtab);
upb_status status = UPB_STATUS_INIT;
upb_load_descriptor_into_symtab(
symtab, RSTRING_PTR(descriptor), RSTRING_LEN(descriptor), &status);
if (!upb_ok(&status)) {
rb_raise(rb_eRuntimeError,
"Error loading descriptor: %s", upb_status_errmsg(&status));
}
const char *name = RSTRING_PTR(message_name);
const upb_msgdef *m = upb_symtab_lookupmsg(symtab, name);
if (!m) {
rb_raise(rb_eRuntimeError, "Message name '%s' not found", name);
}
return make_msgdef(m);
}
static VALUE msgdef_parse(VALUE self, VALUE binary_protobuf) {
const rb_msgdef *rmd;
Data_Get_Struct(self, rb_msgdef, rmd);
VALUE msg = msg_new(self);
rb_msg *msgp;
Data_Get_Struct(msg, rb_msg, msgp);
const upb_handlers *h = upb_pbdecodermethod_desthandlers(rmd->fill_method);
upb_pbdecoder decoder;
upb_sink sink;
upb_status status = UPB_STATUS_INIT;
upb_pbdecoder_init(&decoder, rmd->fill_method, &status);
upb_sink_reset(&sink, h, msgp);
upb_pbdecoder_resetoutput(&decoder, &sink);
fprintf(stderr, "STR: %s\n", RSTRING_PTR(binary_protobuf));
fprintf(stderr, "LEN: %d\n", (int)RSTRING_LEN(binary_protobuf));
size_t n = upb_bufsrc_putbuf(RSTRING_PTR(binary_protobuf),
RSTRING_LEN(binary_protobuf),
upb_pbdecoder_input(&decoder));
fprintf(stderr, "n: %d\n", (int)n);
// TODO(haberman): make uninit optional if custom allocator for parsing
// returns GC-rooted memory. That will make decoding longjmp-safe (required
// if parsing triggers any VM errors like OOM or errors in user handlers).
upb_pbdecoder_uninit(&decoder);
rupb_checkstatus(&status);
return msg;
}
void Init_upb() {
VALUE upb = rb_define_module("Upb");
cMessageDef = rb_define_class_under(upb, "MessageDef", rb_cObject);
rb_define_singleton_method(cMessageDef, "load", msgdef_load, 2);
rb_define_method(cMessageDef, "parse", msgdef_parse, 1);
cMessage = rb_define_class_under(upb, "Message", rb_cObject);
}
Loading…
Cancel
Save