Removed a bunch of obsolete code.

A lot of this code was experimental or temporarily
useful, but is no longer needed.
Josh Haberman 7 years ago
parent e3eae33fb5
commit c0a50de923
  1. 123
  2. 100
  3. 22
  4. 20
  5. 281
  6. 255
  7. 512
  8. 39
  9. 1395
  10. 53
  11. 20
  12. 14
  13. 2
  14. 2
  15. 2
  16. 17
  17. 7
  18. 14
  19. 72
  20. 732
  21. 0
  22. 30
  23. 13
  24. 1173
  25. 64
  26. 38
  27. 18
  28. 172
  29. 64

@ -384,83 +384,6 @@ tests/conformance_upb: tests/conformance_upb.c lib/libupb.a obj/conformance_prot
$(CC) -o tests/conformance_upb tests/conformance_upb.c -Iobj -I. $(CPPFLAGS) $(CFLAGS) obj/conformance.upb.c obj/google/protobuf/*.upb.c lib/libupb.a
# Google protobuf binding ######################################################
upb_bindings_googlepb_SRCS = \
upb/bindings/googlepb/ \
upb/bindings/googlepb/ \
tests/bindings/googlepb/test_vs_proto2.googlemessage1 \
tests/bindings/googlepb/test_vs_proto2.googlemessage2 \
.PHONY: googlepb clean_googlepb googlepbtest
clean: clean_googlepb
@rm -f tests/bindings/googlepb/test_vs_proto2.googlemessage*
@rm -f tests/googlemessage?.h
@rm -f $(GOOGLEPB_LIB)
googlepb: default $(GOOGLEPB_LIB)
googlepbtests: googlepb $(GOOGLEPB_TESTS)
lib/libupb.bindings.googlepb.a: $(upb_bindings_googlepb_SRCS:upb/
$(E) AR $@
$(Q) mkdir -p lib && $(AR) rcs $@ $^
# Generate C++ with Google's protobuf compiler, to test and benchmark against.
tests/google_messages.proto.pb: tests/google_messages.proto
@# TODO: replace with upbc.
protoc tests/google_messages.proto -otests/google_messages.proto.pb
tests/ tests/google_messages.proto
protoc tests/google_messages.proto --cpp_out=.
$(E) XXD tests/google_message1.dat
$(Q) xxd -i < tests/google_message1.dat > tests/google_message1.h
$(E) XXD tests/google_message2.dat
$(Q) xxd -i < tests/google_message2.dat > tests/google_message2.h
lib/libupb.bindings.googlepb.a \
lib/libupb.pb.a \
lib/libupb.descriptor.a \
lib/libupb.a \
tests/bindings/googlepb/ \
tests/google_messages.proto.pb \
tests/ \
tests/testmain.o \
tests/bindings/googlepb/test_vs_proto2.googlemessage1: $(GOOGLEPB_TEST_DEPS) \
tests/google_message1.h \
$(E) CXX $< '(benchmarks::SpeedMessage1)'
$(Q) $(CXX) $(OPT) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \
-DMESSAGE_CIDENT="benchmarks::SpeedMessage1" \
-DMESSAGE_DATA_IDENT=message1_data \
tests/ tests/testmain.o -lprotobuf -lpthread \
tests/bindings/googlepb/test_vs_proto2.googlemessage2: $(GOOGLEPB_TEST_DEPS) \
tests/google_message1.h \
$(E) CXX $< '(benchmarks::SpeedMessage2)'
$(Q) $(CXX) $(OPT) $(WARNFLAGS_CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< \
-DMESSAGE_CIDENT="benchmarks::SpeedMessage2" \
-DMESSAGE_DATA_IDENT=message2_data \
tests/ tests/testmain.o -lprotobuf -lpthread \
# Lua extension ##################################################################
ifeq ($(shell uname), Darwin)
@ -514,49 +437,3 @@ upb/bindings/lua/upb/ upb/bindings/lua/upb/table.c lib/libupb_pic.a
upb/bindings/lua/upb/ upb/bindings/lua/upb/pb.c $(LUA_LIB_DEPS)
$(E) CC upb/bindings/lua/upb/pb.c
$(Q) $(CC) $(OPT) $(CSTD) $(WARNFLAGS) $(CPPFLAGS) $(CFLAGS) -fpic -shared -o $@ $^ $(LUA_LDFLAGS)
# Python extension #############################################################
python: $(PYTHONEXT)
$(PYTHONEXT): $(LIBUPB_PIC) bindings/python/upb.c
$(E) PYTHON bindings/python/upb.c
$(Q) cd bindings/python && $(PYTHON) build --debug install --home=build/install
pythontest: $(PYTHONEXT)
cd bindings/python && cp build/install/lib/python && valgrind $(PYTHON) ./build/install/lib/python/
# Ruby extension ###############################################################
ruby: $(RUBYEXT)
# We pass our important flags to Ruby, but leave the warning flags out.
# Some uses of the Ruby/C API trigger the warnings we normally use, so
# we let Ruby decide the set of warning options to use.
upb/bindings/ruby/Makefile: upb/bindings/ruby/extconf.rb lib/libupb_pic.a lib/libupb.pb_pic.a lib/libupb.descriptor_pic.a
$(E) RUBY upb/bindings/ruby/extconf.rb
$(Q) cd upb/bindings/ruby && ruby extconf.rb "$(OPT) $(CPPFLAGS) $(CFLAGS)"
$(RUBYEXT): upb/bindings/ruby/upb.c upb/bindings/ruby/Makefile
$(E) CC upb/bindings/ruby/upb.c
$(Q) cd upb/bindings/ruby && make
rubytest: $(RUBYEXT) upb/descriptor/descriptor.pb
RUBYLIB="upb/bindings/ruby" ruby tests/bindings/ruby/upb.rb
# Amalgamated source (upb.c/upb.h) ############################################
AMALGAMATE_SRCS=$(upb_SRCS) $(upb_descriptor_SRCS) $(upb_pb_SRCS) $(upb_json_SRCS)
amalgamate: upb.c upb.h
upb.c upb.h: $(AMALGAMATE_SRCS)
$(Q) ./tools/ "" "" $^
amalgamated: upb.c upb.h
$(E) CC upb.c
$(Q) $(CC) -o upb.o -c upb.c $(WARNFLAGS)

@ -1,32 +1,10 @@
# Unleaded - small, fast parsers for the 21st century
# μpb - a small protobuf implementation in C
[![Build Status](](
[![Coverage Status](](
Unleaded is a library of fast parsers and serializers. These
parsers/serializers are written in C and use every available
avenue (particularly JIT compilation) to achieve the fastest
possible speed. However they are also extremely lightweight
(less than 100k of object code) and low-overhead.
The library started as a Protocol Buffers library (upb originally
meant μpb: Micro Protocol Buffers). It still uses
protobuf-like schemas as a core abstraction, but **it has expanded
beyond just Protocol Buffers** to JSON, and other formats are
The library itself is written in C, but very idiomatic APIs
are provided for C++ and popular dynamic languages such as
Lua. See the rest of this README for more information about
these bindings.
Some parts of Unleaded are mature (most notably parsing of
Protocol Buffers) but others are still immature or nonexistent.
The core library abstractions are rapidly converging (this
is saying a lot; it was a long road of about 5 years to make
this happen), which should make it possible to begin building
out the encoders and decoders in earnest.
μpb is a small protobuf implementation written in C.
API and ABI are both subject to change! Please do not distribute
as a shared library for this reason (for now at least).
@ -47,88 +25,16 @@ Other useful targets:
$ make tests
$ make test
## How the library is organized
Unleaded tries to stay very small, but also aims to support
lots of different formats. We reconcile these goals by
being *aggressively modular*. The source tree and the build
artifacts both reflect this organization:
* **upb**: the core library of handlers and defs (schemas)
* **upb/pb**: encoders/decoders for Protocol Buffers
* **upb/json**: encoders/decoders for JSON
* **upb/descriptor**: building upb defs from protobuf desciptors
(ie. descriptor.proto)
* **upb/bindings/googlepb**: binding to the Google protobuf
* **upb/bindings/lua**: binding to the Lua C API (Lua and LuaJIT).
* more to come!
## C and C++ API
The public C/C++ API is defined by all of the .h files in
`upb/` except `.int.h` files (which are internal-only).
The `.h` files define both C and C++ APIs. Both languages
have 100% complete and first-class APIs. The C++ API is a
wrapper around the C API, but all of the wrapping is done in
inline methods in `.h` files, so there is no overhead to
For a more detailed description of the scheme we use to
provide both C and C++ APIs, see:
All of the code that is under `upb/` but *not* under
`upb/bindings/` forms the namespace of upb's cross-language
public API. For example, the code in upb/descriptor would
be exposed as follows:
* **in C/C++:** `#include "upb/descriptor/X.h"`
* **in Lua:** `require "upb.descriptor"`
* **in Python:** `import upb.descriptor`
* etc.
## Google protobuf bindings
Unleaded supports integration with the
[Google protobuf library](
These bindings let you:
* convert protobuf schema objects (`Descriptor`, `FieldDescriptor`, etc).
to their Unleaded equivalents (`upb::MessageDef`, `upb::FieldDef`).
* use Unleaded parsers to populate protobuf generated classes.
Unleaded's parsers are much faster than protobuf's `DynamicMessage`.
If you are generating C++ with the protobuf compiler, then protobuf's
parsers are the same speed or a little faster than Unleaded in JIT
mode, but Unleaded will have smaller binaries because you don't
have to generate the code ahead of time.
To build the Google protobuf integration you must have the protobuf
libraries already installed. Once they are installed run:
$ make googlepb
To test run:
$ make googlepbtests
$ make test
## Lua bindings
Lua bindings provide Unleaded's functionality to Lua programs.
Lua bindings provide μpb's functionality to Lua programs.
The bindings target Lua 5.1, Lua 5.2, LuaJIT, and (soon) Lua 5.3.
Right now the Lua bindings support:
* Building schema objects manually (eg. you can essentially write
.proto files natively in Lua).
* creating message objects.
* parsing Protocol Buffers into message objects.
Other capabilities (parse/serialize JSON, serialize Protocol Buffers)
are coming.
To build the Lua bindings, the Lua libraries must be installed. Once
they are installed, run:

@ -1,25 +1,5 @@
This directory contains code that interfaces upb with external C/C++
libraries. For example:
* upb/bindings/{stdc,stdc++}
interfaces between upb and the standard libraries of C and C++ (like C's
FILE/stdio, C++'s string/iostream, etc.)
* upb/bindings/googlepb
interfaces between upb and the "protobuf" library distributed by Google.
libraries. Right now this is:
* upb/bindings/lua:
a Lua extension that exposes upb to Lua programs via the Lua C API.
* upb/bindings/linux:
code and build system for building upb as a Linux kernel module.
The two key characteristics that decide whether code belongs in upb/bindings/
* Does the code's public API refer to types from another library?
If so it belongs in upb/bindings/. But this doesn't include code that just
happens to use another library internally, as an implementation detail.
* Would this code be useful to someone who is not using this external library
in some other way? If so, the code probably doesn't belong in upb/bindings/.

@ -1,20 +0,0 @@
This directory contains code to interoperate with Google's official
Protocol Buffers release. Since it doesn't really have a name
besides "protobuf," calling this directory "googlepb" seems like the
least confusing option, since it lives in the google::protobuf
We support writing into protobuf's generated classes (and hopefully
reading too, before long). We support both the open source protobuf
release and the Google-internal version (which is mostly the same
code, just in a different namespace). A single compile of upb can
support both (there are no conflicts thanks to function overloading).
The internal version supports some features that are not supported in
the open-source release. Also, the internal version includes the
legacy "proto1" classes which we must support; thankfully this is
mostly relegated to its own separate file.
Our functionality requires the full google::protobuf::Message
interface; we rely on reflection so we know what fields to read/write
and where to put them, so we can't support MessageLite.

@ -1,281 +0,0 @@
// IMPORTANT NOTE! Inside Google, This file is compiled TWICE, once with
// UPB_GOOGLE3 defined and once without! This allows us to provide
// functionality against proto2 and protobuf opensource both in a single binary
// without the two conflicting. However we must be careful not to violate the
// ODR.
#include "upb/bindings/googlepb/bridge.h"
#include <stdio.h>
#include <map>
#include <string>
#include "upb/def.h"
#include "upb/bindings/googlepb/"
#include "upb/bindings/googlepb/"
#include "upb/handlers.h"
#define ASSERT_STATUS(status) do { \
if (!upb_ok(status)) { \
fprintf(stderr, "upb status failure: %s\n", upb_status_errmsg(status)); \
UPB_ASSERT(upb_ok(status)); \
} \
} while (0)
#ifdef UPB_GOOGLE3
#include "net/proto2/public/descriptor.h"
#include "net/proto2/public/message.h"
#include "net/proto2/proto/descriptor.pb.h"
namespace goog = ::proto2;
#include "google/protobuf/descriptor.h"
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.pb.h"
namespace goog = ::google::protobuf;
namespace upb {
namespace googlepb {
const goog::Message* TryGetFieldPrototype(const goog::Message& m,
const goog::FieldDescriptor* f) {
const goog::Message* ret = upb::googlepb::GetProto2FieldPrototype(m, f);
#ifdef UPB_GOOGLE3
if (!ret) ret = upb::googlepb::GetProto1FieldPrototype(m, f);
return ret;
const goog::Message* GetFieldPrototype(const goog::Message& m,
const goog::FieldDescriptor* f) {
const goog::Message* ret = TryGetFieldPrototype(m, f);
return ret;
/* DefBuilder ****************************************************************/
const EnumDef* DefBuilder::GetEnumDef(const goog::EnumDescriptor* ed) {
const EnumDef* cached = FindInCache<EnumDef>(ed);
if (cached) return cached;
EnumDef* e = AddToCache(ed, EnumDef::New());
Status status;
e->set_full_name(ed->full_name(), &status);
for (int i = 0; i < ed->value_count(); i++) {
const goog::EnumValueDescriptor* val = ed->value(i);
bool success = e->AddValue(val->name(), val->number(), &status);
return e;
const MessageDef* DefBuilder::GetMaybeUnfrozenMessageDef(
const goog::Descriptor* d, const goog::Message* m) {
const MessageDef* cached = FindInCache<MessageDef>(d);
if (cached) return cached;
MessageDef* md = AddToCache(d, MessageDef::New());
Status status;
md->set_full_name(d->full_name(), &status);
// Find all regular fields and extensions for this message.
std::vector<const goog::FieldDescriptor*> fields;
d->file()->pool()->FindAllExtensions(d, &fields);
for (int i = 0; i < d->field_count(); i++) {
for (size_t i = 0; i < fields.size(); i++) {
const goog::FieldDescriptor* proto2_f = fields[i];
md->AddField(NewFieldDef(proto2_f, m), &status);
return md;
reffed_ptr<FieldDef> DefBuilder::NewFieldDef(const goog::FieldDescriptor* f,
const goog::Message* m) {
reffed_ptr<FieldDef> upb_f(FieldDef::New());
Status status;
upb_f->set_number(f->number(), &status);
#ifdef UPB_GOOGLE3
if (f->is_extension()) {
upb_f->set_name(f->full_name(), &status);
} else {
upb_f->set_name(f->name(), &status);
const goog::Message* subm = NULL;
if (m) {
subm = TryGetFieldPrototype(*m, f);
if (upb_f->type() == UPB_TYPE_MESSAGE) {
} else if (subm) {
// Weak field: subm will be weak prototype even though the proto2
// descriptor does not indicate a submessage field.
switch (upb_f->type()) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
upb_f->set_default_string(f->default_value_string(), &status);
const goog::Descriptor* subd =
subm ? subm->GetDescriptor() : f->message_type();
upb_f->set_message_subdef(GetMaybeUnfrozenMessageDef(subd, subm),
// We set the enum default numerically.
upb_f->set_enum_subdef(GetEnumDef(f->enum_type()), &status);
return upb_f;
void DefBuilder::Freeze() {
upb::Status status;
upb::Def::Freeze(to_freeze_, &status);
const MessageDef* DefBuilder::GetMessageDef(const goog::Descriptor* d) {
const MessageDef* ret = GetMaybeUnfrozenMessageDef(d, NULL);
return ret;
const MessageDef* DefBuilder::GetMessageDefExpandWeak(
const goog::Message& m) {
const MessageDef* ret = GetMaybeUnfrozenMessageDef(m.GetDescriptor(), &m);
return ret;
/* WriteHandlers *************************************************************/
// static
bool WriteHandlers::AddFieldHandler(const goog::Message& m,
const goog::FieldDescriptor* f,
upb::Handlers* h) {
const FieldDef* upb_f = h->message_def()->FindFieldByNumber(f->number());
if (!upb_f) return false;
if (upb::googlepb::TrySetWriteHandlers(f, m, upb_f, h)) return true;
#ifdef UPB_GOOGLE3
if (upb::googlepb::TrySetProto1WriteHandlers(f, m, upb_f, h)) return true;
// Unsupported reflection class.
// Should we fall back to using the public Reflection interface in this
// case? It's unclear whether it's supported behavior for users to
// create their own Reflection classes.
return false;
// static
upb::reffed_ptr<const upb::Handlers> WriteHandlers::New(
const goog::Message& m) {
CodeCache cache;
return upb::reffed_ptr<const upb::Handlers>(cache.GetWriteHandlers(m));
/* CodeCache *****************************************************************/
const Handlers* CodeCache::GetMaybeUnfrozenWriteHandlers(
const MessageDef* md, const goog::Message& m) {
const Handlers* cached = FindInCache(md);
if (cached) return cached;
Handlers* h = AddToCache(md, upb::Handlers::New(md));
const goog::Descriptor* d = m.GetDescriptor();
for (upb::MessageDef::const_field_iterator i = md->field_begin();
i != md->field_end(); ++i) {
const FieldDef* upb_f = *i;
const goog::FieldDescriptor* proto2_f =
if (!proto2_f) {
proto2_f = d->file()->pool()->FindExtensionByNumber(d, upb_f->number());
bool ok = WriteHandlers::AddFieldHandler(m, proto2_f, h);
if (upb_f->type() == UPB_TYPE_MESSAGE) {
const goog::Message* prototype = GetFieldPrototype(m, proto2_f);
const upb::Handlers* sub_handlers =
GetMaybeUnfrozenWriteHandlers(upb_f->message_subdef(), *prototype);
h->SetSubHandlers(upb_f, sub_handlers);
return h;
const Handlers* CodeCache::GetWriteHandlers(const goog::Message& m) {
const MessageDef* md = def_builder_.GetMessageDefExpandWeak(m);
const Handlers* ret = GetMaybeUnfrozenWriteHandlers(md, m);
upb::Status status;
upb::Handlers::Freeze(to_freeze_, &status);
return ret;
} // namespace googlepb
} // namespace upb

@ -1,255 +0,0 @@
// upb::googlepb::DefBuilder
// upb::googlepb::WriteHandlers
// upb::googlepb::CodeCache
// This file contains functionality for constructing upb Defs and Handlers
// corresponding to proto2 messages. Using this functionality, you can use upb
// to dynamically generate parsing code that can behave exactly like proto2's
// generated parsing code. Alternatively, you can configure things to
// read/write only a subset of the fields for higher performance when only some
// fields are needed.
// Example usage:
// // JIT the parser; should only be done once ahead-of-time.
// upb::reffed_ptr<const upb::Handlers> write_myproto(
// upb::google::NewWriteHandlers(MyProto()));
// upb::reffed_ptr<const upb::Handlers> parse_myproto(
// upb::Decoder::NewDecoderHandlers(write_myproto.get(), true));
// // The actual parsing.
// MyProto proto;
// upb::SeededPipeline<8192> pipeline(upb_realloc, NULL);
// upb::Sink* write_sink = pipeline.NewSink(write_myproto.get());
// upb::Sink* parse_sink = pipeline.NewSink(parse_myproto.get());
// upb::pb::Decoder* decoder = decoder_sink->GetObject<upb::pb::Decoder>();
// upb::pb::ResetDecoderSink(decoder, write_sink);
// write_sink->Reset(&proto);
// Note that there is currently no support for
// CodedInputStream::SetExtensionRegistry(), which allows specifying a separate
// DescriptorPool and MessageFactory for extensions. Since this is a property
// of the input in proto2, it's difficult to build a plan ahead-of-time that
// can properly support this. If it's an important use case, the caller should
// probably build a upb plan explicitly.
#include <map>
#include <vector>
#include "upb/handlers.h"
#include "upb/upb.h"
namespace google {
namespace protobuf {
class FieldDescriptor;
class Descriptor;
class EnumDescriptor;
class Message;
} // namespace protobuf
} // namespace google
namespace proto2 {
class FieldDescriptor;
class Descriptor;
class EnumDescriptor;
class Message;
namespace upb {
namespace googlepb {
// Builds upb::Defs from proto2::Descriptors, and caches all built Defs for
// reuse. CodeCache (below) uses this internally; there is no need to use this
// class directly unless you only want Defs without corresponding Handlers.
// This class is NOT thread-safe.
class DefBuilder {
// Functions to get or create a Def from a corresponding proto2 Descriptor.
// The returned def will be frozen.
// The caller must take a ref on the returned value if it needs it long-term.
// The DefBuilder will retain a ref so it can keep the Def cached, but
// garbage-collection functionality may be added to DefBuilder later that
// could unref the returned pointer.
const EnumDef* GetEnumDef(const proto2::EnumDescriptor* d);
const EnumDef* GetEnumDef(const ::google::protobuf::EnumDescriptor* d);
const MessageDef* GetMessageDef(const proto2::Descriptor* d);
const MessageDef* GetMessageDef(const ::google::protobuf::Descriptor* d);
// Gets or creates a frozen MessageDef, properly expanding weak fields.
// Weak fields are only represented as BYTES fields in the Descriptor (unless
// you construct your descriptors in a somewhat complicated way; see
//, but we can get their true
// definitions relatively easily from the proto Message class.
const MessageDef* GetMessageDefExpandWeak(const proto2::Message& m);
const MessageDef* GetMessageDefExpandWeak(
const ::google::protobuf::Message& m);
// Static methods for converting a def without building a DefBuilder.
static reffed_ptr<const MessageDef> NewMessageDef(
const proto2::Descriptor* d) {
DefBuilder builder;
return reffed_ptr<const MessageDef>(builder.GetMessageDef(d));
// Like GetMessageDef*(), except the returned def might not be frozen.
// We need this function because circular graphs of MessageDefs need to all
// be frozen together, to we have to create the graphs of defs in an unfrozen
// state first.
// If m is non-NULL, expands weak message fields.
const MessageDef* GetMaybeUnfrozenMessageDef(const proto2::Descriptor* d,
const proto2::Message* m);
const MessageDef* GetMaybeUnfrozenMessageDef(
const ::google::protobuf::Descriptor* d,
const ::google::protobuf::Message* m);
// Returns a new-unfrozen FieldDef corresponding to this FieldDescriptor.
// The return value is always newly created (never cached) and the returned
// pointer is the only owner of it.
// If "m" is non-NULL, expands the weak field if it is one, and populates
// *subm_prototype with a prototype of the submessage if this is a weak or
// non-weak MESSAGE or GROUP field.
reffed_ptr<FieldDef> NewFieldDef(const proto2::FieldDescriptor* f,
const proto2::Message* m);
reffed_ptr<FieldDef> NewFieldDef(const ::google::protobuf::FieldDescriptor* f,
const ::google::protobuf::Message* m);
// Freeze all defs that haven't been frozen yet.
void Freeze();
template <class T>
T* AddToCache(const void *proto2_descriptor, reffed_ptr<T> def) {
UPB_ASSERT(def_cache_.find(proto2_descriptor) == def_cache_.end());
def_cache_[proto2_descriptor] = def;
return def.get(); // Continued lifetime is guaranteed by cache.
template <class T>
const T* FindInCache(const void *proto2_descriptor) {
DefCache::iterator iter = def_cache_.find(proto2_descriptor);
return iter == def_cache_.end() ? NULL :
upb::down_cast<const T*>(iter->second.get());
// Maps a proto2 descriptor to the corresponding upb Def we have constructed.
// The proto2 descriptor is void* because the proto2 descriptor types do not
// share a common base.
typedef std::map<const void*, reffed_ptr<upb::Def> > DefCache;
DefCache def_cache_;
// Defs that have not been frozen yet.
std::vector<Def*> to_freeze_;
// Handlers to populate a proto2::Message with incoming data.
class WriteHandlers {
// Returns a upb::Handlers object that can be used to populate a
// proto2::Message object of the same type as "m." For more control over
// handler caching and reuse, instantiate a CodeCache object below.
static upb::reffed_ptr<const upb::Handlers> New(const proto2::Message& m);
static upb::reffed_ptr<const upb::Handlers> New(
const ::google::protobuf::Message& m);
// TODO(haberman): add an interface that takes a list of field paths,
// something like:
// // Returns a Handlers instance that will populate the given field paths
// // only, dropping data for all other field paths on the floor.
// static upb::reffed_ptr<const upb::Handlers> New(
// const proto2::Message& m,
// const std::vector<std::string>& paths);
// A lower-level interface with field granularity.
// Adds a handler to the given upb::Handlers for parsing the given field. If
// you only want to write certain fields into the proto2 message at parse
// time, call these methods ONLY for the fields you want to parse.
// The given field can be either a regular field or an extension, as long as
// its containing_type() matches this message.
static bool AddFieldHandler(const proto2::Message& m,
const proto2::FieldDescriptor* f,
upb::Handlers* h);
static bool AddFieldHandler(const ::google::protobuf::Message& m,
const ::google::protobuf::FieldDescriptor* f,
upb::Handlers* h);
// Builds and caches upb::Handlers for populating proto2 generated classes.
// This class is NOT thread-safe.
class CodeCache {
// Gets or creates handlers for populating messages of the given message type.
// The caller must take a ref on the returned value if it needs it long-term.
// The CodeCache will retain a ref so it can keep the Def cached, but
// garbage-collection functionality may be added to CodeCache later that could
// unref the returned pointer.
const Handlers* GetWriteHandlers(const proto2::Message& m);
const Handlers* GetWriteHandlers(const ::google::protobuf::Message& m);
const Handlers* GetMaybeUnfrozenWriteHandlers(const MessageDef* md,
const proto2::Message& m);
const Handlers* GetMaybeUnfrozenWriteHandlers(
const MessageDef* md, const ::google::protobuf::Message& m);
Handlers* AddToCache(const MessageDef* md, reffed_ptr<Handlers> handlers) {
UPB_ASSERT(handlers_cache_.find(md) == handlers_cache_.end());
handlers_cache_[md] = handlers;
return handlers.get(); // Continue lifetime is guaranteed by the cache.
const Handlers* FindInCache(const MessageDef* md) {
HandlersCache::iterator iter = handlers_cache_.find(md);
return iter == handlers_cache_.end() ? NULL : iter->second.get();
DefBuilder def_builder_;
typedef std::map<const MessageDef*, upb::reffed_ptr<const Handlers> >
HandlersCache handlers_cache_;
std::vector<Handlers*> to_freeze_;
// Functions for getting prototypes; these are only necessary if you are
// building handlers manually, field by field.
// Given a message and a field descriptor for that message, returns a prototype
// for the submessage. Requires that this is a submessage field or a weak
// field.
const proto2::Message* GetFieldPrototype(const proto2::Message& m,
const proto2::FieldDescriptor* f);
const ::google::protobuf::Message* GetFieldPrototype(
const ::google::protobuf::Message& m,
const ::google::protobuf::FieldDescriptor* f);
// Given a message and a field descriptor for that message, returns a prototype
// for the submessage, or NULL if this is not a submessage field or a weak
// field. If this returns non-NULL even though the descriptor's type is not a
// submessage, then this is a weak field. If you don't know what a weak field
// is, you are probably not using one.
const proto2::Message* TryGetFieldPrototype(const proto2::Message& m,
const proto2::FieldDescriptor* f);
const ::google::protobuf::Message* TryGetFieldPrototype(
const ::google::protobuf::Message& m,
const ::google::protobuf::FieldDescriptor* f);
} // namespace googlepb
} // namespace upb

@ -1,512 +0,0 @@
// This set of handlers can write into a proto2::Message whose reflection class
// is _pi::Proto2Reflection (ie. proto1 messages; while slightly confusing, the
// name "Proto2Reflection" indicates that it is a reflection class implementing
// the proto2 reflection interface, but is used for proto1 generated messages).
// Like FieldAccessor this depends on breaking encapsulation, and will need to
// be changed if and when the details of _pi::Proto2Reflection change.
// Note that we have received an exception from c-style-artiters regarding
// dynamic_cast<> in this file:
#include "upb/bindings/googlepb/"
#include <memory>
// TEMPORARY measure until we update the friend declarations in proto1.
// Can't do in a single CL because of components.
#define private public
#define protected public
#include "net/proto2/public/repeated_field.h"
#include "net/proto/internal_layout.h"
#include "net/proto/proto2_reflection.h"
#undef private
#undef protected
namespace proto2 { class Arena; }
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/shim/shim.h"
#include "upb/sink.h"
// Unconditionally evaluate, but also assert in debug mode.
#define CHKRET(x) do { bool ok = (x); UPB_ASSERT(ok); } while (0)
template <class T> static T* GetPointer(void* message, size_t offset) {
return reinterpret_cast<T*>(static_cast<char*>(message) + offset);
namespace upb {
namespace googlepb {
class P2R_Handlers {
// Returns true if we were able to set an accessor and any other properties
// of the FieldDef that are necessary to read/write this field to a
// proto2::Message.
static bool TrySet(const proto2::FieldDescriptor* proto2_f,
const proto2::Message& m, const upb::FieldDef* upb_f,
upb::Handlers* h) {
const proto2::Reflection* base_r = m.GetReflection();
// See file comment re: dynamic_cast.
const _pi::Proto2Reflection* r =
dynamic_cast<const _pi::Proto2Reflection*>(base_r);
if (!r) return false;
// Extensions don't exist in proto1.
#define PRIMITIVE(name, type_name) \
case _pi::CREP_REQUIRED_##name: \
case _pi::CREP_OPTIONAL_##name: \
case _pi::CREP_REPEATED_##name: \
SetPrimitiveHandlers<type_name>(proto2_f, r, upb_f, h); \
return true;
switch (r->GetFieldLayout(proto2_f)->crep) {
PRIMITIVE(INT64, int64_t);
PRIMITIVE(UINT64, uint64_t);
PRIMITIVE(INT32, int32_t);
PRIMITIVE(FIXED64, uint64_t);
PRIMITIVE(FIXED32, uint32_t);
SetStringHandlers(proto2_f, r, upb_f, h);
return true;
SetOutOfLineStringHandlers(proto2_f, r, upb_f, h);
return true;
SetCordHandlers(proto2_f, r, upb_f, h);
return true;
SetRequiredMessageHandlers(proto2_f, m, r, upb_f, h);
return true;
SetMessageHandlers(proto2_f, m, r, upb_f, h);
return true;
SetWeakMessageHandlers(proto2_f, m, r, upb_f, h);
return true;
return false;
// If the field "f" in the message "m" is a weak field, returns the prototype
// of the submessage (which may be a specific type or may be OpaqueMessage).
// Otherwise returns NULL.
static const proto2::Message* GetWeakPrototype(
const proto2::Message& m, const proto2::FieldDescriptor* f) {
// See file comment re: dynamic_cast.
const _pi::Proto2Reflection* r =
dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection());
if (!r) return NULL;
const _pi::Field* field = r->GetFieldLayout(f);
if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK) {
return static_cast<const proto2::Message*>(
} else if (field->crep == _pi::CREP_OPTIONAL_FOREIGN_WEAK_PROTO2) {
return field->proto2_weak_default_instance();
} else {
return NULL;
// If "m" is a message that uses Proto2Reflection, returns the prototype of
// the submessage (which may be OpaqueMessage for a weak field that is not
// linked in). Otherwise returns NULL.
static const proto2::Message* GetFieldPrototype(
const proto2::Message& m, const proto2::FieldDescriptor* f) {
// See file comment re: dynamic_cast.
const proto2::Message* ret = GetWeakPrototype(m, f);
if (ret) {
return ret;
} else if (dynamic_cast<const _pi::Proto2Reflection*>(m.GetReflection())) {
// Since proto1 has no dynamic message, it must be from the generated
// factory.
UPB_ASSERT(f->cpp_type() == proto2::FieldDescriptor::CPPTYPE_MESSAGE);
ret = proto2::MessageFactory::generated_factory()->GetPrototype(
return ret;
} else {
return NULL;
class FieldOffset {
FieldOffset(const proto2::FieldDescriptor* f,
const _pi::Proto2Reflection* r)
: offset_(GetOffset(f, r)), is_repeated_(f->is_repeated()) {
if (!is_repeated_) {
int64_t hasbit = GetHasbit(f, r);
hasbyte_ = hasbit / 8;
mask_ = 1 << (hasbit % 8);
template <class T> T* GetFieldPointer(proto2::Message* message) const {
return GetPointer<T>(message, offset_);
void SetHasbit(void* message) const {
uint8_t* byte = GetPointer<uint8_t>(message, hasbyte_);
*byte |= mask_;
const size_t offset_;
bool is_repeated_;
// Only for non-repeated fields.
int32_t hasbyte_;
int8_t mask_;
static upb_selector_t GetSelector(const upb::FieldDef* f,
upb::Handlers::Type type) {
upb::Handlers::Selector selector;
bool ok = upb::Handlers::GetSelector(f, type, &selector);
return selector;
static int16_t GetHasbit(const proto2::FieldDescriptor* f,
const _pi::Proto2Reflection* r) {
return (r->layout_->has_bit_offset * 8) + r->GetFieldLayout(f)->has_index;
static uint16_t GetOffset(const proto2::FieldDescriptor* f,
const _pi::Proto2Reflection* r) {
return r->GetFieldLayout(f)->offset;
// StartSequence /////////////////////////////////////////////////////////////
template <class T>
static void SetStartRepeatedField(
const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
f, UpbBindT(PushOffset<proto2::RepeatedField<T> >,
new FieldOffset(proto2_f, r))));
template <class T>
static void SetStartRepeatedPtrField(
const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
f, UpbBindT(PushOffset<proto2::RepeatedPtrField<T> >,
new FieldOffset(proto2_f, r))));
static void SetStartRepeatedSubmessageField(
const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
f, UpbBind(PushOffset<proto2::internal::RepeatedPtrFieldBase>,
new FieldOffset(proto2_f, r))));
template <class T>
static T* PushOffset(proto2::Message* m, const FieldOffset* offset) {
return offset->GetFieldPointer<T>(m);
// Primitive Value (numeric, enum, bool) /////////////////////////////////////
template <typename T>
static void SetPrimitiveHandlers(const proto2::FieldDescriptor* proto2_f,
const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
if (f->IsSequence()) {
SetStartRepeatedField<T>(proto2_f, r, f, h);
CHKRET(h->SetValueHandler<T>(f, UpbMakeHandlerT(Append<T>)));
} else {
upb::Shim::Set(h, f, GetOffset(proto2_f, r), GetHasbit(proto2_f, r)));
template <typename T>
static void Append(proto2::RepeatedField<T>* r, T val) {
// Proto1's ProtoArray class derives from proto2::RepeatedField.
// String ////////////////////////////////////////////////////////////////////
static void SetStringHandlers(const proto2::FieldDescriptor* proto2_f,
const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
h->SetStringHandler(f, UpbMakeHandler(OnStringBuf));
if (f->IsSequence()) {
SetStartRepeatedPtrField<string>(proto2_f, r, f, h);
CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedString)));
} else {
f, UpbBind(StartString, new FieldOffset(proto2_f, r))));
static string* StartString(proto2::Message* m, const FieldOffset* info,
size_t size_hint) {
string* str = info->GetFieldPointer<string>(m);
// reserve() here appears to hurt performance rather than help.
return str;
static void OnStringBuf(string* s, const char* buf, size_t n) {
s->append(buf, n);
static string* StartRepeatedString(proto2::RepeatedPtrField<string>* r,
size_t size_hint) {
string* str = r->Add();
// reserve() here appears to hurt performance rather than help.
return str;
// Out-of-line string ////////////////////////////////////////////////////////
static void SetOutOfLineStringHandlers(
const proto2::FieldDescriptor* proto2_f, const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
// This type is only used for non-repeated string fields.
f, UpbBind(StartOutOfLineString, new FieldOffset(proto2_f, r))));
CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnStringBuf)));
static string* StartOutOfLineString(proto2::Message* m,
const FieldOffset* info,
size_t size_hint) {
string** str = info->GetFieldPointer<string*>(m);
if (*str == &::proto2::internal::GetEmptyString())
*str = new string();
// reserve() here appears to hurt performance rather than help.
return *str;
// Cord //////////////////////////////////////////////////////////////////////
static void SetCordHandlers(const proto2::FieldDescriptor* proto2_f,
const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
if (f->IsSequence()) {
SetStartRepeatedField<Cord>(proto2_f, r, f, h);
CHKRET(h->SetStartStringHandler(f, UpbMakeHandler(StartRepeatedCord)));
} else {
f, UpbBind(StartCord, new FieldOffset(proto2_f, r))));
CHKRET(h->SetStringHandler(f, UpbMakeHandler(OnCordBuf)));
static Cord* StartCord(proto2::Message* m, const FieldOffset* offset,
size_t size_hint) {
Cord* field = offset->GetFieldPointer<Cord>(m);
return field;
static void OnCordBuf(Cord* c, const char* buf, size_t n) {
c->Append(StringPiece(buf, n));
static Cord* StartRepeatedCord(proto2::RepeatedField<Cord>* r,
size_t size_hint) {
return r->Add();
// SubMessage ////////////////////////////////////////////////////////////////
class SubMessageHandlerData : public FieldOffset {
SubMessageHandlerData(const proto2::Message& prototype,
const proto2::FieldDescriptor* f,
const _pi::Proto2Reflection* r)
: FieldOffset(f, r) {
prototype_ = GetWeakPrototype(prototype, f);
if (!prototype_) prototype_ = GetFieldPrototype(prototype, f);
const proto2::Message* prototype() const { return prototype_; }
const proto2::Message* prototype_;
static void SetRequiredMessageHandlers(
const proto2::FieldDescriptor* proto2_f, const proto2::Message& m,
const _pi::Proto2Reflection* r, const upb::FieldDef* f,
upb::Handlers* h) {
if (f->IsSequence()) {
SetStartRepeatedSubmessageField(proto2_f, r, f, h);
f, UpbBind(StartRepeatedSubMessage,
new SubMessageHandlerData(m, proto2_f, r))));
} else {
f, UpbBind(StartRequiredSubMessage, new FieldOffset(proto2_f, r))));
static proto2::Message* StartRequiredSubMessage(proto2::Message* m,
const FieldOffset* offset) {
return offset->GetFieldPointer<proto2::Message>(m);
static void SetMessageHandlers(const proto2::FieldDescriptor* proto2_f,
const proto2::Message& m,
const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
std::unique_ptr<SubMessageHandlerData> data(
new SubMessageHandlerData(m, proto2_f, r));
if (f->IsSequence()) {
SetStartRepeatedSubmessageField(proto2_f, r, f, h);
f, UpbBind(StartRepeatedSubMessage, data.release())));
} else {
f, UpbBind(StartSubMessage, data.release())));
static void SetWeakMessageHandlers(const proto2::FieldDescriptor* proto2_f,
const proto2::Message& m,
const _pi::Proto2Reflection* r,
const upb::FieldDef* f, upb::Handlers* h) {
std::unique_ptr<SubMessageHandlerData> data(
new SubMessageHandlerData(m, proto2_f, r));
if (f->IsSequence()) {
SetStartRepeatedSubmessageField(proto2_f, r, f, h);
f, UpbBind(StartRepeatedSubMessage, data.release())));
} else {
f, UpbBind(StartWeakSubMessage, data.release())));
static void* StartSubMessage(proto2::Message* m,
const SubMessageHandlerData* info) {
proto2::Message** subm = info->GetFieldPointer<proto2::Message*>(m);
if (*subm == info->prototype()) *subm = (*subm)->New();
return *subm;
static void* StartWeakSubMessage(proto2::Message* m,
const SubMessageHandlerData* info) {
proto2::Message** subm = info->GetFieldPointer<proto2::Message*>(m);
if (*subm == NULL) {
*subm = info->prototype()->New();
return *subm;
class RepeatedMessageTypeHandler {
typedef proto2::Message Type;
// AddAllocated() calls this, but only if other objects are sitting
// around waiting for reuse, which we will not do.
static void Delete(Type* t) {
static ::proto2::Arena* GetArena(Type* t) {
return t->GetArena();
static void* GetMaybeArenaPointer(Type* t) {
return t->GetMaybeArenaPointer();
static inline Type* NewFromPrototype(
const Type* prototype, ::proto2::Arena* arena = NULL) {
return prototype->New(arena);
// AddAllocated() calls this, but only if other objects are sitting
// around waiting for reuse, which we will not do.
static void Delete(Type* t, ::proto2::Arena* arena) {
static void Merge(const Type& from, Type* to) {
// Closure is a RepeatedPtrField<SubMessageType>*, but we access it through
// its base class RepeatedPtrFieldBase*.
static proto2::Message* StartRepeatedSubMessage(
proto2::internal::RepeatedPtrFieldBase* r,
const SubMessageHandlerData* info) {
proto2::Message* submsg = r->AddFromCleared<RepeatedMessageTypeHandler>();
if (!submsg) {
submsg = info->prototype()->New();
return submsg;
bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f,
const proto2::Message& m,
const upb::FieldDef* upb_f, upb::Handlers* h) {
return googlepb::P2R_Handlers::TrySet(proto2_f, m, upb_f, h);
const proto2::Message* GetProto1FieldPrototype(
const proto2::Message& m, const proto2::FieldDescriptor* f) {
const proto2::Message *weak = googlepb::P2R_Handlers::GetWeakPrototype(m, f);
if (weak) return weak;
if (f->cpp_type() != proto2::FieldDescriptor::CPPTYPE_MESSAGE) {
return NULL;
return googlepb::P2R_Handlers::GetFieldPrototype(m, f);
} // namespace googlepb
} // namespace upb

@ -1,39 +0,0 @@
// Support for registering field handlers that can write into a legacy proto1
// message. This functionality is only needed inside Google.
// This is an internal-only interface.
namespace proto2 {
class FieldDescriptor;
class Message;
namespace upb {
class FieldDef;
class Handlers;
namespace upb {
namespace googlepb {
// Sets field handlers in the given Handlers object for writing to a single
// field (as described by "proto2_f" and "upb_f") into a message constructed
// by the same factory as "prototype." Returns true if this was successful
// (this will fail if "prototype" is not a proto1 message, or if we can't
// handle it for some reason).
bool TrySetProto1WriteHandlers(const proto2::FieldDescriptor* proto2_f,
const proto2::Message& prototype,
const upb::FieldDef* upb_f, upb::Handlers* h);
// Returns a prototype for the given this (possibly-weak) field. Returns NULL
// if this is not a submessage field of any kind (weak or no).
const proto2::Message* GetProto1FieldPrototype(
const proto2::Message& m, const proto2::FieldDescriptor* f);
} // namespace googlepb
} // namespace upb
#endif // UPB_GOOGLE_PROTO1_H_

File diff suppressed because it is too large Load Diff

@ -1,53 +0,0 @@
// Support for registering field handlers that can write into a proto2
// message that uses GeneratedMessageReflection (which includes all messages
// generated by the proto2 compiler as well as DynamicMessage).
// This is an internal-only interface.
namespace proto2 {
class FieldDescriptor;
class Message;
namespace google {
namespace protobuf {
class FieldDescriptor;
class Message;
namespace upb {
class FieldDef;
class Handlers;
namespace upb {
namespace googlepb {
// Sets field handlers in the given Handlers object for writing to a single
// field (as described by "proto2_f" and "upb_f") into a message constructed
// by the same factory as "prototype." Returns true if this was successful
// (this will fail if "prototype" is not a proto1 message, or if we can't
// handle it for some reason).
bool TrySetWriteHandlers(const proto2::FieldDescriptor* proto2_f,
const proto2::Message& prototype,
const upb::FieldDef* upb_f, upb::Handlers* h);
bool TrySetWriteHandlers(const ::google::protobuf::FieldDescriptor* proto2_f,
const ::google::protobuf::Message& prototype,
const upb::FieldDef* upb_f, upb::Handlers* h);
// Returns a prototype for the given field in "m", if the given message uses
// GeneratedMessageReflection. Otherwise returns NULL.
const proto2::Message* GetProto2FieldPrototype(
const proto2::Message& m, const proto2::FieldDescriptor* f);
const ::google::protobuf::Message* GetProto2FieldPrototype(
const ::google::protobuf::Message& m,
const ::google::protobuf::FieldDescriptor* f);
} // namespace googlepb
} // namespace upb
#endif // UPB_GOOGLE_PROTO2_H_

@ -1,20 +0,0 @@
obj-m = upb.o
upb-objs = \
../../upb/upb.o \
../../upb/bytestream.o \
../../upb/def.o \
../../upb/handlers.o \
../../upb/table.o \
../../upb/refcount.o \
../../upb/msg.o \
KVERSION = $(shell uname -r)
ccflags-y := -I$(PWD) -I$(PWD)/../.. -Wno-declaration-after-statement -std=gnu99
make -C /lib/modules/$(KVERSION)/build M=$(PWD) modules
make -C /lib/modules/$(KVERSION)/build M=$(PWD) clean

@ -1,14 +0,0 @@
#include <linux/kernel.h>
#ifdef NDEBUG
#define assert(x)
#define assert(x) \
if (!(x)) panic("Assertion failed: %s at %s:%d", #x, __FILE__, __LINE__);

@ -1,2 +0,0 @@
#include <linux/errno.h>

@ -1,2 +0,0 @@
#include <linux/types.h>

@ -1,2 +0,0 @@
#include <linux/kernel.h> // For sprintf and friends.

@ -1,17 +0,0 @@
** Linux-kernel implementations of some stdlib.h functions.
#include <linux/slab.h>
static inline void *malloc(size_t size) { return kmalloc(size, GFP_ATOMIC); }
static inline void free(void *p) { kfree(p); }
static inline void *realloc(void *p, size_t size) {
return krealloc(p, size, GFP_ATOMIC);

@ -1,7 +0,0 @@
#include <linux/string.h>
#endif /* UPB_DEF_H_ */

@ -1,14 +0,0 @@
from distutils.core import setup, Extension
Extension('upb.__init__', ['upb.c'],
define_macros=[("UPB_UNALIGNED_READS_OK", 1)],

@ -1,72 +0,0 @@
import upb
import unittest
class TestFieldDef(unittest.TestCase):
def test_construction(self):
fielddef1 = upb.FieldDef()
self.assertTrue(fielddef1.number is None)
self.assertTrue( is None)
self.assertTrue(fielddef1.type is None)
self.assertEqual(fielddef1.label, upb.LABEL_OPTIONAL)
fielddef2 = upb.FieldDef(number=5, name="field2",
label=upb.LABEL_REQUIRED, type=upb.TYPE_INT32,
self.assertTrue(id(fielddef1) != id(fielddef2))
self.assertEqual(fielddef2.number, 5)
self.assertEqual(, "field2")
self.assertEqual(fielddef2.label, upb.LABEL_REQUIRED)
self.assertEqual(fielddef2.type, upb.TYPE_INT32)
self.assertEqual(fielddef2.type_name, "MyType")
fielddef2.number = 8
self.assertEqual(fielddef2.number, 8) = "xxx"
self.assertEqual(, "xxx")
fielddef2.label = upb.LABEL_REPEATED
self.assertEqual(fielddef2.label, upb.LABEL_REPEATED)
fielddef2.type = upb.TYPE_FLOAT
self.assertEqual(fielddef2.type, upb.TYPE_FLOAT)
def test_nosubclasses(self):
def create_subclass():
class MyClass(upb.FieldDef):
self.assertRaises(TypeError, create_subclass)
# TODO: test that assigning invalid values is properly prevented.
class TestMessageDef(unittest.TestCase):
def test_construction(self):
msgdef1 = upb.MessageDef()
self.assertTrue(msgdef1.fqname is None)
self.assertEqual(msgdef1.fields(), [])
fields = [upb.FieldDef(number=1, name="field1", type=upb.TYPE_INT32)]
msgdef2 = upb.MessageDef(fqname="Message2", fields=fields)
self.assertEqual(set(msgdef2.fields()), set(fields))
f2 = upb.FieldDef(number=2, name="field2", type=upb.TYPE_INT64)
self.assertEqual(set(msgdef2.fields()), set(fields))
class TestSymbolTable(unittest.TestCase):
def test_construction(self):
s = upb.SymbolTable()
self.assertEqual(s.defs(), []);
self.assertTrue(s.lookup("A") is not None)
self.assertTrue(s.lookup("A") is s.lookup("A"))
if __name__ == '__main__':

@ -1,732 +0,0 @@
** Python extension exposing the core of upb: definitions, handlers,
** and a message type.
#include <stddef.h>
#include <Python.h>
#include "upb/def.h"
#include "upb/msg.h"
static bool streql(const char *a, const char *b) { return strcmp(a, b) == 0; }
PyObject *PyUpb_Error(const char *str) {
PyErr_SetString(PyExc_TypeError, str);
return NULL;
int PyUpb_ErrorInt(const char *str) {
PyErr_SetString(PyExc_TypeError, str);
return -1;
#define PyUpb_CheckStatus(status) \
if (!upb_ok(status)) return PyUpb_Error((status)->str);
static upb_accessor_vtbl *PyUpb_AccessorForField(upb_fielddef *f);
/* Object cache ***************************************************************/
// For objects that are just wrappers around a C object pointer, we keep a
// cache mapping C pointer -> wrapper object. This allows us to consistently
// vend the same Python object given the same C object. This prevents us from
// creating too many Python objects unnecessarily. Just as importantly, it
// provides the expected semantics:
// if field.subdef is field.subdef:
// print "Sanity prevails."
// If we conjured up a new wrapper object every time, the above would not be
// true.
// The cost is having to put all such objects in a table, but since this only
// applies to schema-level objects (defs, handlers, etc) this seems acceptable.
// We do *not* have to put all message objects in this table.
// We use weak refs so that the cache does not prevent the wrapper objects from
// being collected. The table is stored as a static variable; to use
// sub-interpreters this would need to change, but I believe that using
// sub-interpreters is exceedingly rare in practice.
typedef struct {
void *obj;
PyObject *weakreflist;
} PyUpb_ObjWrapper;
static PyObject *obj_cache = NULL;
static PyObject *reverse_cache = NULL;
static PyObject *weakref_callback = NULL;
// Utility functions for manipulating Python dictionaries keyed by pointer.
static PyObject *PyUpb_StringForPointer(const void *ptr) {
PyObject *o = PyString_FromStringAndSize((const char *)&ptr, sizeof(void*));
return o;
static PyObject *PyUpb_ObjCacheDeleteCallback(PyObject *self, PyObject *ref) {
// Python very unfortunately clears the weakref before running our callback.
// This prevents us from using the weakref to find the C pointer we need to
// remove from the cache. As a result we are forced to keep a second map
// mapping weakref->C pointer.
PyObject *ptr_str = PyDict_GetItem(reverse_cache, ref);
int err = PyDict_DelItem(obj_cache, ptr_str);
err = PyDict_DelItem(reverse_cache, ref);
return Py_None;
static PyObject *PyUpb_ObjCacheGet(const void *obj, PyTypeObject *type) {
PyObject *kv = PyUpb_StringForPointer(obj);
PyObject *ref = PyDict_GetItem(obj_cache, kv);
PyObject *ret;
if (ref) {
ret = PyWeakref_GetObject(ref);
UPB_ASSERT(ret != Py_None);
} else {
PyUpb_ObjWrapper *wrapper = (PyUpb_ObjWrapper*)type->tp_alloc(type, 0);
wrapper->obj = (void*)obj;
wrapper->weakreflist = NULL;
ret = (PyObject*)wrapper;
ref = PyWeakref_NewRef(ret, weakref_callback);
UPB_ASSERT(PyWeakref_GetObject(ref) == ret);
PyDict_SetItem(obj_cache, kv, ref);
PyDict_SetItem(reverse_cache, ref, kv);
return ret;
/* PyUpb_Def ******************************************************************/
static PyTypeObject *PyUpb_TypeForDef(const upb_def *def);
static void PyUpb_Def_dealloc(PyObject *obj) {
PyUpb_ObjWrapper *wrapper = (void*)obj;
PyObject *PyUpb_Def_GetOrCreate(const upb_def *def) {
return def ? PyUpb_ObjCacheGet(def, PyUpb_TypeForDef(def)) : Py_None;
// Will need to expand once other kinds of defs are supported.
#define Check_Def(o, badret) Check_MessageDef(o, badret)
/* PyUpb_FieldDef *************************************************************/
static PyTypeObject PyUpb_FieldDefType;
static int PyUpb_FieldDef_setattro(PyObject *o, PyObject *key, PyObject *val);
#define Check_FieldDef(o, badret) \
(void*)(((PyUpb_ObjWrapper*)o)->obj); do { \
if(!PyObject_TypeCheck(o, &PyUpb_FieldDefType)) { \
PyErr_SetString(PyExc_TypeError, "must be a upb.FieldDef"); \
return badret; \
} \
} while(0)
static PyObject *PyUpb_FieldDef_GetOrCreate(const upb_fielddef *f) {
return PyUpb_ObjCacheGet(f, &PyUpb_FieldDefType);
static PyObject *PyUpb_FieldDef_new(PyTypeObject *subtype,
PyObject *args, PyObject *kwds) {
return PyUpb_ObjCacheGet(upb_fielddef_new(), subtype);
static int PyUpb_FieldDef_init(PyObject *self, PyObject *args, PyObject *kwds) {
if (!kwds) return 0;
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(kwds, &pos, &key, &value))
PyUpb_FieldDef_setattro(self, key, value);
return 0;
static void PyUpb_FieldDef_dealloc(PyObject *obj) {
PyUpb_ObjWrapper *wrapper = (void*)obj;
if (wrapper->weakreflist) PyObject_ClearWeakRefs(obj);
static PyObject *PyUpb_FieldDef_getattro(PyObject *obj, PyObject *attr_name) {
upb_fielddef *f = Check_FieldDef(obj, NULL);
if (!upb_fielddef_ismutable(f)) {
PyErr_SetString(PyExc_TypeError, "fielddef is not mutable.");
return NULL;
const char *name = PyString_AsString(attr_name);
if (streql(name, "name")) {
const char *name = upb_fielddef_name(f);
return name == NULL ? Py_None : PyString_FromString(name);
} else if (streql(name, "number")) {
uint32_t num = upb_fielddef_number(f);
return num == 0 ? Py_None : PyInt_FromLong(num);
} else if (streql(name, "type")) {
uint8_t type = upb_fielddef_type(f);
return type == 0 ? Py_None : PyInt_FromLong(type);
} else if (streql(name, "label")) {
return PyInt_FromLong(upb_fielddef_label(f));
} else if (streql(name, "type_name")) {
const char *name = upb_fielddef_typename(f);
return name == NULL ? Py_None : PyString_FromString(name);
} else if (streql(name, "subdef")) {
// NYI;
return NULL;
} else if (streql(name, "msgdef")) {
// NYI;
return NULL;
} else {
return PyUpb_Error("Invalid fielddef member.");
static int PyUpb_FieldDef_setattro(PyObject *o, PyObject *key, PyObject *val) {
upb_fielddef *f = Check_FieldDef(o, -1);
const char *field = PyString_AsString(key);
if (!upb_fielddef_ismutable(f))
return PyUpb_ErrorInt("fielddef is not mutable.");
if (streql(field, "name")) {
const char *name = PyString_AsString(val);
if (!name || !upb_fielddef_setname(f, name))
return PyUpb_ErrorInt("Invalid name");
} else if (streql(field, "number")) {
// TODO: should check truncation. Non-security issue.
// Non-int will return -1, which is already invalid as a field number.
if (!upb_fielddef_setnumber(f, PyInt_AsLong(val)))
return PyUpb_ErrorInt("Invalid number");
} else if (streql(field, "type")) {
// TODO: should check truncation. Non-security issue.
if (!upb_fielddef_settype(f, PyInt_AsLong(val)))
return PyUpb_ErrorInt("Invalid type");
} else if (streql(field, "label")) {
// TODO: should check truncation. Non-security issue.
if (!upb_fielddef_setlabel(f, PyInt_AsLong(val)))
return PyUpb_ErrorInt("Invalid label");
} else if (streql(field, "type_name")) {
const char *name = PyString_AsString(val);
if (!name || !upb_fielddef_settypename(f, name))
return PyUpb_ErrorInt("Invalid type_name");
} else if (streql(field, "default_value")) {
// NYI
return -1;
} else {
return PyUpb_ErrorInt("Invalid fielddef member.");
return 0;
static PyTypeObject PyUpb_FieldDefType = {
0, /* ob_size */
"upb.FieldDef", /* tp_name */
sizeof(PyUpb_ObjWrapper), /* tp_basicsize */
0, /* tp_itemsize */
&PyUpb_FieldDef_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* TODO */ /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
&PyUpb_FieldDef_getattro, /* tp_getattro */
&PyUpb_FieldDef_setattro, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
offsetof(PyUpb_ObjWrapper, weakreflist),/* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
&PyUpb_FieldDef_init, /* tp_init */
0, /* tp_alloc */
&PyUpb_FieldDef_new, /* tp_new */
0, /* tp_free */
/* PyUpb_MessageDef ***********************************************************/
static PyTypeObject PyUpb_MessageDefType;
static int PyUpb_MessageDef_setattro(PyObject *o, PyObject *key, PyObject *val);
#define Check_MessageDef(o, badret) \
(void*)(((PyUpb_ObjWrapper*)o)->obj); do { \
if(!PyObject_TypeCheck(o, &PyUpb_MessageDefType)) { \
PyErr_SetString(PyExc_TypeError, "must be a upb.MessageDef"); \
return badret; \
} \
} while(0)
static PyObject *PyUpb_MessageDef_new(PyTypeObject *subtype,
PyObject *args, PyObject *kwds) {
return PyUpb_ObjCacheGet(upb_msgdef_new(), subtype);
static PyObject *PyUpb_MessageDef_add_fields(PyObject *o, PyObject *args);
static int PyUpb_MessageDef_init(
PyObject *self, PyObject *args, PyObject *kwds) {
if (!kwds) return 0;
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(kwds, &pos, &key, &value)) {
const char *field = PyString_AsString(key);
if (streql(field, "fields")) {
PyUpb_MessageDef_add_fields(self, value);
} else {
PyUpb_MessageDef_setattro(self, key, value);
return 0;
static PyObject *PyUpb_MessageDef_getattro(PyObject *obj, PyObject *attr_name) {
upb_msgdef *m = Check_MessageDef(obj, NULL);
const char *name = PyString_AsString(attr_name);
if (streql(name, "fqname")) {
const char *fqname = upb_def_fqname(UPB_UPCAST(m));
return fqname == NULL ? Py_None : PyString_FromString(fqname);
return PyObject_GenericGetAttr(obj, attr_name);
static int PyUpb_MessageDef_setattro(
PyObject *o, PyObject *key, PyObject *val) {
upb_msgdef *m = Check_MessageDef(o, -1);
if (!upb_def_ismutable(UPB_UPCAST(m))) {
PyErr_SetString(PyExc_TypeError, "MessageDef is not mutable.");
return -1;
const char *name = PyString_AsString(key);
if (streql(name, "fqname")) {
const char *fqname = PyString_AsString(val);
if (!fqname || !upb_def_setfqname(UPB_UPCAST(m), fqname))
return PyUpb_ErrorInt("Invalid fqname");
} else {
return PyUpb_ErrorInt("Invalid MessageDef member.");
return 0;
static PyObject *PyUpb_MessageDef_fields(PyObject *obj, PyObject *args) {
upb_msgdef *m = Check_MessageDef(obj, NULL);
PyObject *ret = PyList_New(0);
upb_msg_field_iter i;
for(upb_msg_field_begin(&i, m);
upb_msg_field_next(&ii)) {
upb_fielddef *f = upb_msg_iter_field(&i);
PyList_Append(ret, PyUpb_FieldDef_GetOrCreate(f));
return ret;
static PyObject *PyUpb_MessageDef_add_fields(PyObject *o, PyObject *fields) {
upb_msgdef *m = Check_MessageDef(o, NULL);
if (!PySequence_Check(fields)) return PyUpb_Error("Must be a sequence");
Py_ssize_t len = PySequence_Length(fields);
if (len > UPB_MAX_FIELDS) return PyUpb_Error("Too many fields.");
upb_fielddef *f[len];
int i;
for (i = 0; i < len; i++) {
PyObject *field = PySequence_GetItem(fields, i);
f[i] = Check_FieldDef(field, NULL);
upb_msgdef_addfields(m, f, len);
return Py_None;
static PyObject *PyUpb_MessageDef_add_field(PyObject *o, PyObject *field) {
upb_msgdef *m = Check_MessageDef(o, NULL);
upb_fielddef *f = Check_FieldDef(field, NULL);
upb_msgdef_addfield(m, f);
return Py_None;
static PyMethodDef PyUpb_MessageDef_methods[] = {
{"add_field", &PyUpb_MessageDef_add_field, METH_O,
"Adds a list of fields."},
{"add_fields", &PyUpb_MessageDef_add_fields, METH_O,
"Adds a list of fields."},
{"fields", &PyUpb_MessageDef_fields, METH_NOARGS,
"Returns list of fields."},
static PyTypeObject PyUpb_MessageDefType = {
0, /* ob_size */
"upb.MessageDef", /* tp_name */
sizeof(PyUpb_ObjWrapper), /* tp_basicsize */
0, /* tp_itemsize */
&PyUpb_Def_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* TODO */ /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
&PyUpb_MessageDef_getattro, /* tp_getattro */
&PyUpb_MessageDef_setattro, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
offsetof(PyUpb_ObjWrapper, weakreflist),/* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
PyUpb_MessageDef_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
&PyUpb_MessageDef_init, /* tp_init */
0, /* tp_alloc */
&PyUpb_MessageDef_new, /* tp_new */
0, /* tp_free */
static PyTypeObject *PyUpb_TypeForDef(const upb_def *def) {
switch(def->type) {
case UPB_DEF_MSG: return &PyUpb_MessageDefType;
default: return NULL;
/* PyUpb_SymbolTable **********************************************************/
static PyTypeObject PyUpb_SymbolTableType;
#define Check_SymbolTable(o, badret) \
(void*)(((PyUpb_ObjWrapper*)o)->obj); do { \
if(!PyObject_TypeCheck(o, &PyUpb_SymbolTableType)) { \
PyErr_SetString(PyExc_TypeError, "must be a upb.MessageDef"); \
return badret; \
} \
} while(0)
static PyObject *PyUpb_SymbolTable_new(PyTypeObject *subtype,
PyObject *args, PyObject *kwds) {
return PyUpb_ObjCacheGet(upb_symtab_new(), subtype);
static int PyUpb_SymbolTable_init(
PyObject *self, PyObject *args, PyObject *kwds) {
return 0;
static void PyUpb_SymbolTable_dealloc(PyObject *obj) {
PyUpb_ObjWrapper *wrapper = (void*)obj;
// narg is a lua table containing a list of defs to add.
static PyObject *PyUpb_SymbolTable_add_defs(PyObject *o, PyObject *defs) {
upb_symtab *s = Check_SymbolTable(o, NULL);
if (!PySequence_Check(defs)) return PyUpb_Error("Must be a sequence");
Py_ssize_t n = PySequence_Length(defs);
// Prevent stack overflow.
if (n > 2048) return PyUpb_Error("Too many defs");
upb_def *cdefs[n];
int i = 0;
for (i = 0; i < n; i++) {
PyObject *pydef = PySequence_GetItem(defs, i);
upb_def *def = Check_MessageDef(pydef, NULL);
cdefs[i++] = def;
upb_msgdef *md = upb_dyncast_msgdef(def);
if (!md) continue;
upb_msg_field_iter j;
for(upb_msg_field_begin(&j, md);
upb_msg_field_next(&j)) {
upb_fielddef *f = upb_msg_iter_field(j);
upb_fielddef_setaccessor(f, PyUpb_AccessorForField(f));
upb_status status = UPB_STATUS_INIT;
upb_symtab_add(s, cdefs, n, &status);
return Py_None;
static PyObject *PyUpb_SymbolTable_add_def(PyObject *o, PyObject *def) {
PyObject *defs = PyList_New(1);
PyList_SetItem(defs, 0, def);
return PyUpb_SymbolTable_add_defs(o, defs);
// TODO: update to allow user to choose type of defs.
static PyObject *PyUpb_SymbolTable_defs(PyObject *o, PyObject *none) {
upb_symtab *s = Check_SymbolTable(o, NULL);
int count;
const upb_def **defs = upb_symtab_getdefs(s, &count, UPB_DEF_ANY);
PyObject *ret = PyList_New(count);
int i;
for(i = 0; i < count; i++)
PyList_SetItem(ret, i, PyUpb_Def_GetOrCreate(defs[i]));
return ret;
static PyObject *PyUpb_SymbolTable_lookup(PyObject *o, PyObject *arg) {
upb_symtab *s = Check_SymbolTable(o, NULL);
const char *name = PyString_AsString(arg);
const upb_def *def = upb_symtab_lookup(s, name);
return PyUpb_Def_GetOrCreate(def);
static PyMethodDef PyUpb_SymbolTable_methods[] = {
{"add_def", &PyUpb_SymbolTable_add_def, METH_O, NULL},
{"add_defs", &PyUpb_SymbolTable_add_defs, METH_O, NULL},
{"defs", &PyUpb_SymbolTable_defs, METH_NOARGS, NULL},
{"lookup", &PyUpb_SymbolTable_lookup, METH_O, NULL},
static PyTypeObject PyUpb_SymbolTableType = {
0, /* ob_size */
"upb.SymbolTable", /* tp_name */
sizeof(PyUpb_ObjWrapper), /* tp_basicsize */
0, /* tp_itemsize */
&PyUpb_SymbolTable_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* TODO */ /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
offsetof(PyUpb_ObjWrapper, weakreflist),/* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
PyUpb_SymbolTable_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
&PyUpb_SymbolTable_init, /* tp_init */
0, /* tp_alloc */
&PyUpb_SymbolTable_new, /* tp_new */
0, /* tp_free */
/* Accessor and PyUpb_Message *************************************************/
typedef struct {
PyTypeObject type;
PyTypeObject *alt_type;
} PyUpb_MessageType;
typedef struct {
PyObject *msgdef;
char data[1];
} PyUpb_Message;
PyObject **PyUpb_Accessor_GetPtr(PyObject *_m, upb_value fval) {
PyUpb_Message *m = (PyUpb_Message*)_m;
const upb_fielddef *f = upb_value_getfielddef(fval);
return (PyObject**)&m->data[f->offset];
static upb_sflow_t PyUpb_Message_StartSequence(void *m, upb_value fval) {
PyObject **seq = PyUpb_Accessor_GetPtr(m, fval);
PyTypeObject *type = ((PyUpb_MessageType*)Py_TYPE(m))->alt_type;
if (!*seq) *seq = type->tp_alloc(type, 0);
upb_stdmsg_sethas(m, fval);
return UPB_CONTINUE_WITH(*seq);
static upb_sflow_t PyUpb_Message_StartSubmessage(void *m, upb_value fval) {
PyObject **submsg = PyUpb_Accessor_GetPtr(m, fval);
PyTypeObject *type = Py_TYPE(m);
if (!*submsg) *submsg = type->tp_alloc(type, 0);
upb_stdmsg_sethas(m, fval);
return UPB_CONTINUE_WITH(*submsg);
static upb_sflow_t PyUpb_Message_StartRepeatedSubmessage(
void *a, upb_value fval) {
PyObject **elem = upb_stdarray_append(a, sizeof(void*));
PyTypeObject *type = ((PyUpb_MessageType*)Py_TYPE(a))->alt_type;
if (!*elem) *elem = type->tp_alloc(type, 0);
return UPB_CONTINUE_WITH(*elem);
static upb_flow_t PyUpb_Message_StringValue(
void *m, upb_value fval, upb_value val) {
PyObject **str = PyUpb_Accessor_GetPtr(m, fval);
if (*str) { Py_DECREF(*str); }
*str = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
upb_strref_read(upb_value_getstrref(val), PyString_AsString(*str));
upb_stdmsg_sethas(m, fval);
static upb_flow_t PyUpb_Message_AppendStringValue(
void *a, upb_value fval, upb_value val) {
PyObject **elem = upb_stdarray_append(a, sizeof(void*));
*elem = PyString_FromStringAndSize(NULL, upb_value_getstrref(val)->len);
upb_strref_read(upb_value_getstrref(val), PyString_AsString(*elem));
#define STDMSG(type, size) static upb_accessor_vtbl vtbl = { \
&PyUpb_Message_StartSubmessage, \
&upb_stdmsg_set ## type, \
&PyUpb_Message_StartSequence, \
&PyUpb_Message_StartRepeatedSubmessage, \
&upb_stdmsg_set ## type ## _r, \
&upb_stdmsg_has, \
&upb_stdmsg_getptr, \
&upb_stdmsg_get ## type, \
&upb_stdmsg_seqbegin, \
&upb_stdmsg_ ## size ## byte_seqnext, \
&upb_stdmsg_seqget ## type};
#define RETURN_STDMSG(type, size) { STDMSG(type, size); return &vtbl; }
static upb_accessor_vtbl *PyUpb_AccessorForField(upb_fielddef *f) {
switch (f->type) {
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64): RETURN_STDMSG(uint64, 8)
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64): RETURN_STDMSG(int64, 8)
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32): RETURN_STDMSG(uint32, 4)
case UPB_TYPE(BOOL): { STDMSG(bool, 1); return &vtbl; }
case UPB_TYPE(MESSAGE): RETURN_STDMSG(ptr, 8) // TODO: 32-bit
STDMSG(ptr, 8);
vtbl.set = &PyUpb_Message_StringValue;
vtbl.append = &PyUpb_Message_AppendStringValue;
return &vtbl;
return NULL;
/* Toplevel *******************************************************************/
static PyMethodDef methods[] = {
// PyModule_AddObject steals a ref, but our object is statically allocated
// and must not be deleted.
#define PyUpb_AddType(mod, name, type) \
if (PyType_Ready(type) < 0) return; \
Py_INCREF(type); \
PyModule_AddObject(mod, name, (PyObject*)type);
PyMODINIT_FUNC initupb(void) {
PyObject *mod = Py_InitModule("upb", methods);
PyUpb_AddType(mod, "FieldDef", &PyUpb_FieldDefType);
PyUpb_AddType(mod, "MessageDef", &PyUpb_MessageDefType);
PyUpb_AddType(mod, "SymbolTable", &PyUpb_SymbolTableType);
PyModule_AddIntConstant(mod, "LABEL_OPTIONAL", UPB_LABEL(OPTIONAL));
PyModule_AddIntConstant(mod, "LABEL_REQUIRED", UPB_LABEL(REQUIRED));
PyModule_AddIntConstant(mod, "LABEL_REPEATED", UPB_LABEL(REPEATED));
PyModule_AddIntConstant(mod, "TYPE_DOUBLE", UPB_TYPE(DOUBLE));
PyModule_AddIntConstant(mod, "TYPE_FLOAT", UPB_TYPE(FLOAT));
PyModule_AddIntConstant(mod, "TYPE_INT64", UPB_TYPE(INT64));
PyModule_AddIntConstant(mod, "TYPE_UINT64", UPB_TYPE(UINT64));
PyModule_AddIntConstant(mod, "TYPE_INT32", UPB_TYPE(INT32));
PyModule_AddIntConstant(mod, "TYPE_FIXED64", UPB_TYPE(FIXED64));
PyModule_AddIntConstant(mod, "TYPE_FIXED32", UPB_TYPE(FIXED32));
PyModule_AddIntConstant(mod, "TYPE_BOOL", UPB_TYPE(BOOL));
PyModule_AddIntConstant(mod, "TYPE_STRING", UPB_TYPE(STRING));
PyModule_AddIntConstant(mod, "TYPE_GROUP", UPB_TYPE(GROUP));
PyModule_AddIntConstant(mod, "TYPE_MESSAGE", UPB_TYPE(MESSAGE));
PyModule_AddIntConstant(mod, "TYPE_BYTES", UPB_TYPE(BYTES));
PyModule_AddIntConstant(mod, "TYPE_UINT32", UPB_TYPE(UINT32));
PyModule_AddIntConstant(mod, "TYPE_ENUM", UPB_TYPE(ENUM));
PyModule_AddIntConstant(mod, "TYPE_SFIXED32", UPB_TYPE(SFIXED32));
PyModule_AddIntConstant(mod, "TYPE_SFIXED64", UPB_TYPE(SFIXED64));
PyModule_AddIntConstant(mod, "TYPE_SINT32", UPB_TYPE(SINT32));
PyModule_AddIntConstant(mod, "TYPE_SINT64", UPB_TYPE(SINT64));
obj_cache = PyDict_New();
reverse_cache = PyDict_New();
static PyMethodDef method = {
"WeakRefCallback", &PyUpb_ObjCacheDeleteCallback, METH_O, NULL};
PyObject *pyname = PyString_FromString(method.ml_name);
weakref_callback = PyCFunction_NewEx(&method, NULL, pyname);

@ -1,30 +0,0 @@
# Ruby extension
To build, run (from the top upb directory):
$ make ruby
$ sudo make install
To test, run:
$ make rubytest
The binding currently supports:
- loading message types from descriptors.
- constructing message instances
- reading and writing their members
- parsing and serializing the messages
- all data types (including nested and repeated)
The binding does *not* currently support:
- defining message types directly in Ruby code.
- generating Ruby code for a .proto file.
- type-checking for setters
- homogenous / type-checked arrays
- default values
Because code generation is not currently implemented, the interface to import
a specific message type is kind of clunky for the moment.

@ -1,13 +0,0 @@
require 'mkmf'
# Extra args are passed on the command-line.
$CFLAGS += (" " + ARGV[0])
find_header("upb/upb.h", "../../..") or raise "Can't find upb headers"
find_library("upb_pic", "upb_msgdef_new", "../../../lib") or raise "Can't find upb lib"
find_library("upb.descriptor_pic", "upb_descreader_init", "../../../lib") or raise "Can't find upb.descriptor lib"
find_library("upb.pb_pic", "upb_pbdecoder_init", "../../../lib") or raise "Can't find upb.pb lib"

File diff suppressed because it is too large Load Diff

@ -1,64 +0,0 @@
#ifndef UPB_STDCPP_H_
#define UPB_STDCPP_H_
#include "upb/sink.h"
namespace upb {
template <class T>
class FillStringHandler {
static void SetHandler(BytesHandler* handler) {
upb_byteshandler_setstartstr(handler, &FillStringHandler::StartString,
upb_byteshandler_setstring(handler, &FillStringHandler::StringBuf, NULL);
// TODO(haberman): add UpbBind/UpbMakeHandler support to BytesHandler so these
// can be prettier callbacks.
static void* StartString(void *c, const void *hd, size_t size) {
T* str = static_cast<T*>(c);
return c;
static size_t StringBuf(void* c, const void* hd, const char* buf, size_t n,
const BufferHandle* h) {
T* str = static_cast<T*>(c);
try {
str->append(buf, n);
return n;
} catch (const std::exception&) {
return 0;
class StringSink {
template <class T>
explicit StringSink(T* target) {
// TODO(haberman): we need to avoid rebuilding a new handler every time,
// but with class globals disallowed for google3 C++ this is tricky.
input_.Reset(&handler_, target);
BytesSink* input() { return &input_; }
BytesHandler handler_;
BytesSink input_;
} // namespace upb
#endif // UPB_STDCPP_H_

@ -1,38 +0,0 @@
** Handling of errno.
#include "upb/stdc/error.h"
#include <string.h>
void upb_status_fromerrno(upb_status *status, int code) {
if (code != 0 && !upb_errno_is_wouldblock(code)) {
status->error = true;
upb_status_setcode(status, &upb_stdc_errorspace, code);
bool upb_errno_is_wouldblock(int code) {
#ifdef EAGAIN
code == EAGAIN ||
code == EWOULDBLOCK ||
bool upb_stdc_codetostr(int code, char *buf, size_t len) {
// strerror() may use static buffers and is not guaranteed to be thread-safe,
// but it appears that it is not subject to buffer overflows in practice, and
// it used by other portable and high-quality software like Lua. For more
// discussion see:
char *err = strerror(code);
if (strlen(err) >= len) return false;
strcpy(buf, err);
return true;
upb_errorspace upb_stdc_errorspace = {"stdc", &upb_stdc_codetostr};

@ -1,18 +0,0 @@
** Handling of errno.
#include "upb/upb.h"
extern upb_errorspace upb_stdc_errorspace;
void upb_status_fromerrno(upb_status *status, int code);
bool upb_errno_is_wouldblock(int code);
#endif /* UPB_STDC_ERROR_H_ */

@ -1,172 +0,0 @@
#include "upb/stdc/io.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "upb/stdc/error.h"
// We can make this configurable if necessary.
#define BUF_SIZE 32768
/* upb_stdio ******************************************************************/
int upb_stdio_cmpbuf(const void *_key, const void *_elem) {
const uint64_t *ofs = _key;
const upb_stdio_buf *buf = _elem;
return (*ofs / BUF_SIZE) - (buf->ofs / BUF_SIZE);
static upb_stdio_buf *upb_stdio_findbuf(const upb_stdio *s, uint64_t ofs) {
// TODO: it is probably faster to linear search short lists, and to
// special-case the last one or two bufs.
return bsearch(&ofs, s->bufs, s->nbuf, sizeof(*s->bufs), &upb_stdio_cmpbuf);
static upb_stdio_buf *upb_stdio_rotatebufs(upb_stdio *s) {
upb_stdio_buf **reuse = NULL; // XXX
int num_reused = 0, num_inuse = 0;
// Could sweep only a subset of bufs if this was a hotspot.
for (int i = 0; i < s->nbuf; i++) {
upb_stdio_buf *buf = s->bufs[i];
if (buf->refcount > 0) {
s->bufs[num_inuse++] = buf;
} else {
reuse[num_reused++] = buf;
UPB_ASSERT(num_reused + num_inuse == s->nbuf);
memcpy(s->bufs + num_inuse, reuse, num_reused * sizeof(upb_stdio_buf*));
if (num_reused == 0) {
s->bufs = realloc(s->bufs, s->nbuf * sizeof(*s->bufs));
s->bufs[s->nbuf-1] = malloc(sizeof(upb_stdio_buf) + BUF_SIZE);
return s->bufs[s->nbuf-1];
return s->bufs[s->nbuf-num_reused];
void upb_stdio_discard(void *src, uint64_t ofs) {
upb_bytesuccess_t upb_stdio_fetch(void *src, uint64_t ofs, size_t *bytes_read) {
upb_stdio *stdio = (upb_stdio*)src;
upb_stdio_buf *buf = upb_stdio_rotatebufs(stdio);
*bytes_read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
buf->len = *bytes_read;
if (*bytes_read < (size_t)BUF_SIZE) {
// Error or EOF.
if (feof(stdio->file)) {
return UPB_BYTE_EOF;
if (ferror(stdio->file)) {
#ifdef EINTR
// If we encounter a client who doesn't want to retry EINTR, we can easily
// add a boolean property of the stdio that controls this behavior.
if (errno == EINTR) {
goto retry;
upb_status_fromerrno(&stdio->src.status, errno);
return upb_errno_is_wouldblock(errno) ?
return UPB_BYTE_OK;
void upb_stdio_copy(const void *src, uint64_t ofs, size_t len, char *dst) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
memcpy(dst, buf->data + ofs, BUF_SIZE - ofs);
len -= (BUF_SIZE - ofs);
dst += (BUF_SIZE - ofs);
while (len > 0) {
size_t bytes = UPB_MIN(len, BUF_SIZE);
memcpy(dst, buf->data, bytes);
len -= bytes;
dst += bytes;
const char *upb_stdio_getptr(const void *src, uint64_t ofs, size_t *len) {
upb_stdio_buf *buf = upb_stdio_findbuf(src, ofs);
ofs -= buf->ofs;
*len = BUF_SIZE - ofs;
return &buf->data[ofs];
#if 0
upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if (written < len) {
upb_status_setf(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
return written;
uint32_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
const char *fmt, va_list args) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, sink));
int written = vfprintf(stdio->file, fmt, args);
if (written < 0) {
upb_status_seterrf(status, "Error writing to stdio stream.");
return -1;
return written;
void upb_stdio_init(upb_stdio *stdio) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
upb_bytesrc_init(&stdio->src, &bytesrc_vtbl);
//static upb_bytesink_vtbl bytesink_vtbl = {
// upb_stdio_putstr,
// upb_stdio_vprintf
//upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
void upb_stdio_reset(upb_stdio* stdio, FILE *file) {
stdio->file = file;
stdio->should_close = false;
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s) {
FILE *f = fopen(filename, mode);
if (!f) {
upb_status_fromerrno(s, errno);
setvbuf(stdio->file, NULL, _IONBF, 0); // Disable buffering; we do our own.
upb_stdio_reset(stdio, f);
stdio->should_close = true;
void upb_stdio_uninit(upb_stdio *stdio) {
// Can't report status; caller should flush() to ensure data is written.
if (stdio->should_close) fclose(stdio->file);
stdio->file = NULL;
upb_bytesrc* upb_stdio_bytesrc(upb_stdio *stdio) { return &stdio->src; }
upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }

@ -1,64 +0,0 @@
** ANSI C file I/O.
#ifndef UPB_STDC_IO_H_
#define UPB_STDC_IO_H_
#include <stdio.h>
#include "upb/bytestream.h"
/* upb_stdio ******************************************************************/
// bytesrc/bytesink for ANSI C stdio, which is less efficient than posixfd, but
// more portable.
// Specifically, stdio functions acquire locks on every operation (unless you
// use the f{read,write,...}_unlocked variants, which are not standard) and
// performs redundant buffering (unless you disable it with setvbuf(), but we
// can only do this on newly-opened filehandles).
typedef struct {
uint64_t ofs;
size_t len;
uint32_t refcount;
char data[];
} upb_stdio_buf;
// We use a single object for both bytesrc and bytesink for simplicity.
// The object is still not thread-safe, and may only be used by one reader
// and one writer at a time.
typedef struct {
upb_bytesrc src;
upb_bytesink sink;
FILE *file;
bool should_close;
upb_stdio_buf **bufs;
int nbuf;
uint32_t szbuf;
} upb_stdio;
void upb_stdio_init(upb_stdio *stdio);
// Caller should call upb_stdio_flush prior to calling this to ensure that
// all data is flushed, otherwise data can be silently dropped if an error
// occurs flushing the remaining buffers.
void upb_stdio_uninit(upb_stdio *stdio);
// Resets the object to read/write to the given "file." The caller is
// responsible for closing the file, which must outlive this object.
void upb_stdio_reset(upb_stdio *stdio, FILE *file);
// As an alternative to upb_stdio_reset(), initializes the object by opening a
// file, and will handle closing it. This may result in more efficient I/O
// than the previous since we can call setvbuf() to disable buffering.
void upb_stdio_open(upb_stdio *stdio, const char *filename, const char *mode,
upb_status *s);
upb_bytesrc *upb_stdio_bytesrc(upb_stdio *stdio);
upb_bytesink *upb_stdio_bytesink(upb_stdio *stdio);
#endif /* UPB_STDC_IO_H_ */