Merge pull request #8850 from haberman/rubybuilder

Move DSL implementation from C to pure Ruby
pull/8854/head
Joshua Haberman 3 years ago committed by GitHub
commit 88e6a36562
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      .gitignore
  2. 1
      Makefile.am
  3. 1
      ruby/Rakefile
  4. 1352
      ruby/ext/google/protobuf_c/defs.c
  5. 70
      ruby/lib/google/protobuf.rb
  6. 439
      ruby/lib/google/protobuf/descriptor_dsl.rb
  7. 2
      ruby/travis-test.sh
  8. 72
      src/google/protobuf/compiler/ruby/ruby_generator.cc
  9. 2
      tests.sh

1
.gitignore vendored

@ -183,7 +183,6 @@ js/testproto_libs2.js
/bazel-*
# ruby test output
ruby/lib/
ruby/tests/basic_test_pb.rb
ruby/tests/basic_test_proto2_pb.rb
ruby/tests/generated_code_pb.rb

@ -1138,6 +1138,7 @@ ruby_EXTRA_DIST= \
ruby/ext/google/protobuf_c/ruby-upb.h \
ruby/ext/google/protobuf_c/wrap_memcpy.c \
ruby/google-protobuf.gemspec \
ruby/lib/google/protobuf/descriptor_dsl.rb \
ruby/lib/google/protobuf/message_exts.rb \
ruby/lib/google/protobuf/repeated_field.rb \
ruby/lib/google/protobuf/well_known_types.rb \

@ -8,6 +8,7 @@ spec = Gem::Specification.load("google-protobuf.gemspec")
well_known_protos = %w[
google/protobuf/any.proto
google/protobuf/api.proto
google/protobuf/descriptor.proto
google/protobuf/duration.proto
google/protobuf/empty.proto
google/protobuf/field_mask.proto

File diff suppressed because it is too large Load Diff

@ -51,75 +51,7 @@ else
require 'google/protobuf_c'
end
module Google
module Protobuf
module Internal
def self.infer_package(names)
# Package is longest common prefix ending in '.', if any.
if not names.empty?
min, max = names.minmax
last_common_dot = nil
min.size.times { |i|
if min[i] != max[i] then break end
if min[i] == ?. then last_common_dot = i end
}
if last_common_dot
return min.slice(0, last_common_dot)
end
end
nil
end
class NestingBuilder
def initialize(msg_names, enum_names)
@to_pos = {nil=>nil}
@msg_children = Hash.new { |hash, key| hash[key] = [] }
@enum_children = Hash.new { |hash, key| hash[key] = [] }
msg_names.each_with_index { |name, idx| @to_pos[name] = idx }
enum_names.each_with_index { |name, idx| @to_pos[name] = idx }
msg_names.each { |name| @msg_children[parent(name)] << name }
enum_names.each { |name| @enum_children[parent(name)] << name }
end
def build(package)
return build_msg(package)
end
private
def build_msg(msg)
return {
:pos => @to_pos[msg],
:msgs => @msg_children[msg].map { |child| build_msg(child) },
:enums => @enum_children[msg].map { |child| @to_pos[child] },
}
end
private
def parent(name)
idx = name.rindex(?.)
if idx
return name.slice(0, idx)
else
return nil
end
end
end
def self.fixup_descriptor(package, msg_names, enum_names)
if package.nil?
package = self.infer_package(msg_names + enum_names)
end
nesting = NestingBuilder.new(msg_names, enum_names).build(package)
return package, nesting
end
end
end
end
require 'google/protobuf/descriptor_dsl'
end
require 'google/protobuf/repeated_field'

@ -0,0 +1,439 @@
#!/usr/bin/ruby
#
# Code that implements the DSL for defining proto messages.
require 'google/protobuf/descriptor_pb'
module Google
module Protobuf
module Internal
class Builder
def initialize(pool)
@pool = pool
@default_file = nil # Constructed lazily
end
def add_file(name, options={}, &block)
builder = FileBuilder.new(@pool, name, options)
builder.instance_eval(&block)
internal_add_file(builder)
end
def add_message(name, &block)
internal_default_file.add_message(name, &block)
end
def add_enum(name, &block)
internal_default_file.add_enum(name, &block)
end
# ---- Internal methods, not part of the DSL ----
def build
if @default_file
internal_add_file(@default_file)
end
end
private def internal_add_file(file_builder)
proto = file_builder.build
serialized = Google::Protobuf::FileDescriptorProto.encode(proto)
@pool.add_serialized_file(serialized)
end
private def internal_default_file
@default_file ||= FileBuilder.new(@pool, "ruby_default_file.proto")
end
end
class FileBuilder
def initialize(pool, name, options={})
@pool = pool
@file_proto = Google::Protobuf::FileDescriptorProto.new(
name: name,
syntax: options.fetch(:syntax, "proto3")
)
end
def add_message(name, &block)
builder = MessageBuilder.new(name, self, @file_proto)
builder.instance_eval(&block)
builder.internal_add_synthetic_oneofs
end
def add_enum(name, &block)
EnumBuilder.new(name, @file_proto).instance_eval(&block)
end
# ---- Internal methods, not part of the DSL ----
# These methods fix up the file descriptor to account for differences
# between the DSL and FileDescriptorProto.
# The DSL can omit a package name; here we infer what the package is if
# was not specified.
def infer_package(names)
# Package is longest common prefix ending in '.', if any.
if not names.empty?
min, max = names.minmax
last_common_dot = nil
min.size.times { |i|
if min[i] != max[i] then break end
if min[i] == "." then last_common_dot = i end
}
if last_common_dot
return min.slice(0, last_common_dot)
end
end
nil
end
def rewrite_enum_default(field)
if field.type != :TYPE_ENUM or !field.has_default_value? or !field.has_type_name?
return
end
value = field.default_value
type_name = field.type_name
if value.empty? or value[0].ord < "0".ord or value[0].ord > "9".ord
return
end
if type_name.empty? || type_name[0] != "."
return
end
type_name = type_name[1..-1]
as_int = Integer(value) rescue return
enum_desc = @pool.lookup(type_name)
if enum_desc.is_a?(Google::Protobuf::EnumDescriptor)
# Enum was defined in a previous file.
name = enum_desc.lookup_value(as_int)
if name
# Update the default value in the proto.
field.default_value = name
end
else
# See if enum was defined in this file.
@file_proto.enum_type.each { |enum_proto|
if enum_proto.name == type_name
enum_proto.value.each { |enum_value_proto|
if enum_value_proto.number == as_int
# Update the default value in the proto.
field.default_value = enum_value_proto.name
return
end
}
# We found the right enum, but no value matched.
return
end
}
end
end
# Historically we allowed enum defaults to be specified as a number.
# In retrospect this was a mistake as descriptors require defaults to
# be specified as a label. This can make a difference if multiple
# labels have the same number.
#
# Here we do a pass over all enum defaults and rewrite numeric defaults
# by looking up their labels. This is complicated by the fact that the
# enum definition can live in either the symtab or the file_proto.
#
# We take advantage of the fact that this is called *before* enums or
# messages are nested in other messages, so we only have to iterate
# one level deep.
def rewrite_enum_defaults
@file_proto.message_type.each { |msg|
msg.field.each { |field|
rewrite_enum_default(field)
}
}
end
# We have to do some relatively complicated logic here for backward
# compatibility.
#
# In descriptor.proto, messages are nested inside other messages if that is
# what the original .proto file looks like. For example, suppose we have this
# foo.proto:
#
# package foo;
# message Bar {
# message Baz {}
# }
#
# The descriptor for this must look like this:
#
# file {
# name: "test.proto"
# package: "foo"
# message_type {
# name: "Bar"
# nested_type {
# name: "Baz"
# }
# }
# }
#
# However, the Ruby generated code has always generated messages in a flat,
# non-nested way:
#
# Google::Protobuf::DescriptorPool.generated_pool.build do
# add_message "foo.Bar" do
# end
# add_message "foo.Bar.Baz" do
# end
# end
#
# Here we need to do a translation where we turn this generated code into the
# above descriptor. We need to infer that "foo" is the package name, and not
# a message itself. */
def split_parent_name(msg_or_enum)
name = msg_or_enum.name
idx = name.rindex(?.)
if idx
return name[0...idx], name[idx+1..-1]
else
return nil, name
end
end
def get_parent_msg(msgs_by_name, name, parent_name)
parent_msg = msgs_by_name[parent_name]
if parent_msg.nil?
raise "To define name #{name}, there must be a message named #{parent_name} to enclose it"
end
return parent_msg
end
def fix_nesting
# Calculate and update package.
msgs_by_name = @file_proto.message_type.map { |msg| [msg.name, msg] }.to_h
enum_names = @file_proto.enum_type.map { |enum_proto| enum_proto.name }
package = infer_package(msgs_by_name.keys + enum_names)
if package
@file_proto.package = package
end
# Update nesting based on package.
final_msgs = Google::Protobuf::RepeatedField.new(:message, Google::Protobuf::DescriptorProto)
final_enums = Google::Protobuf::RepeatedField.new(:message, Google::Protobuf::EnumDescriptorProto)
# Note: We don't iterate over msgs_by_name.values because we want to
# preserve order as listed in the DSL.
@file_proto.message_type.each { |msg|
parent_name, msg.name = split_parent_name(msg)
if parent_name == package
final_msgs << msg
else
get_parent_msg(msgs_by_name, msg.name, parent_name).nested_type << msg
end
}
@file_proto.enum_type.each { |enum|
parent_name, enum.name = split_parent_name(enum)
if parent_name == package
final_enums << enum
else
get_parent_msg(msgs_by_name, enum.name, parent_name).enum_type << enum
end
}
@file_proto.message_type = final_msgs
@file_proto.enum_type = final_enums
end
def internal_file_proto
@file_proto
end
def build
rewrite_enum_defaults
fix_nesting
return @file_proto
end
end
class MessageBuilder
def initialize(name, file_builder, file_proto)
@file_builder = file_builder
@msg_proto = Google::Protobuf::DescriptorProto.new(
:name => name
)
file_proto.message_type << @msg_proto
end
def optional(name, type, number, type_class=nil, options=nil)
internal_add_field(:LABEL_OPTIONAL, name, type, number, type_class, options)
end
def proto3_optional(name, type, number, type_class=nil, options=nil)
internal_add_field(:LABEL_OPTIONAL, name, type, number, type_class, options,
proto3_optional: true)
end
def required(name, type, number, type_class=nil, options=nil)
internal_add_field(:LABEL_REQUIRED, name, type, number, type_class, options)
end
def repeated(name, type, number, type_class = nil)
internal_add_field(:LABEL_REPEATED, name, type, number, type_class, nil)
end
def oneof(name, &block)
OneofBuilder.new(name, self).instance_eval(&block)
end
# Defines a new map field on this message type with the given key and
# value types, tag number, and type class (for message and enum value
# types). The key type must be :int32/:uint32/:int64/:uint64, :bool, or
# :string. The value type type must be a Ruby symbol (as accepted by
# FieldDescriptor#type=) and the type_class must be a string, if
# present (as accepted by FieldDescriptor#submsg_name=).
def map(name, key_type, value_type, number, value_type_class = nil)
if key_type == :float or key_type == :double or key_type == :enum or
key_type == :message
raise ArgError, "Not an acceptable key type: " + key_type
end
entry_name = "#{@msg_proto.name}_MapEntry_#{name}"
@file_builder.add_message entry_name do
optional :key, key_type, 1
optional :value, value_type, 2, value_type_class
end
options = @file_builder.internal_file_proto.message_type.last.options ||= MessageOptions.new
options.map_entry = true
repeated name, :message, number, entry_name
end
# ---- Internal methods, not part of the DSL ----
def internal_add_synthetic_oneofs
# We have to build a set of all names, to ensure that synthetic oneofs
# are not creating conflicts
names = {}
@msg_proto.field.each { |field| names[field.name] = true }
@msg_proto.oneof_decl.each { |oneof| names[oneof.name] = true }
@msg_proto.field.each { |field|
if field.proto3_optional
# Prepend '_' until we are no longer conflicting.
oneof_name = field.name
while names[oneof_name]
oneof_name = "_" + oneof_name
end
names[oneof_name] = true
field.oneof_index = @msg_proto.oneof_decl.size
@msg_proto.oneof_decl << Google::Protobuf::OneofDescriptorProto.new(
name: oneof_name
)
end
}
end
def internal_add_field(label, name, type, number, type_class, options,
oneof_index: nil, proto3_optional: false)
# Allow passing either:
# - (name, type, number, options) or
# - (name, type, number, type_class, options)
if options.nil? and type_class.instance_of?(Hash)
options = type_class;
type_class = nil;
end
field_proto = Google::Protobuf::FieldDescriptorProto.new(
:label => label,
:name => name,
:type => ("TYPE_" + type.to_s.upcase).to_sym,
:number => number
)
if type_class
# Make it an absolute type name by prepending a dot.
field_proto.type_name = "." + type_class
end
if oneof_index
field_proto.oneof_index = oneof_index
end
if proto3_optional
field_proto.proto3_optional = true
end
if options
if options.key?(:default)
default = options[:default]
if !default.instance_of?(String)
# Call #to_s since all defaults are strings in the descriptor.
default = default.to_s
end
# XXX: we should be C-escaping bytes defaults.
field_proto.default_value = default.dup.force_encoding("UTF-8")
end
if options.key?(:json_name)
field_proto.json_name = options[:json_name]
end
end
@msg_proto.field << field_proto
end
def internal_msg_proto
@msg_proto
end
end
class OneofBuilder
def initialize(name, msg_builder)
@msg_builder = msg_builder
oneof_proto = Google::Protobuf::OneofDescriptorProto.new(
:name => name
)
msg_proto = msg_builder.internal_msg_proto
@oneof_index = msg_proto.oneof_decl.size
msg_proto.oneof_decl << oneof_proto
end
def optional(name, type, number, type_class=nil, options=nil)
@msg_builder.internal_add_field(
:LABEL_OPTIONAL, name, type, number, type_class, options,
oneof_index: @oneof_index)
end
end
class EnumBuilder
def initialize(name, file_proto)
@enum_proto = Google::Protobuf::EnumDescriptorProto.new(
:name => name
)
file_proto.enum_type << @enum_proto
end
def value(name, number)
enum_value_proto = Google::Protobuf::EnumValueDescriptorProto.new(
name: name,
number: number
)
@enum_proto.value << enum_value_proto
end
end
end
# Re-open the class (the rest of the class is implemented in C)
class DescriptorPool
def build(&block)
builder = Internal::Builder.new(self)
builder.instance_eval(&block)
builder.build
end
end
end
end

@ -18,7 +18,7 @@ test_version() {
rake gc_test &&
cd ../conformance && make test_jruby &&
cd ../ruby/compatibility_tests/v3.0.0 && ./test.sh"
elif [ "$version" == "ruby-2.6.0" -o "$version" == "ruby-2.7.0" -o "$version" == "ruby-3.0.0" ] ; then
elif [ "$version" == "ruby-2.6.0" -o "$version" == "ruby-2.7.0" -o "$version" == "ruby-3.0.2" ] ; then
bash --login -c \
"rvm install $version && rvm use $version && \
which ruby && \

@ -527,6 +527,42 @@ bool MaybeEmitDependency(const FileDescriptor* import,
}
}
bool GenerateDslDescriptor(const FileDescriptor* file, io::Printer* printer,
std::string* error) {
printer->Print(
"require 'google/protobuf'\n\n");
printer->Print("Google::Protobuf::DescriptorPool.generated_pool.build do\n");
printer->Indent();
printer->Print("add_file(\"$filename$\", :syntax => :$syntax$) do\n",
"filename", file->name(), "syntax",
StringifySyntax(file->syntax()));
printer->Indent();
for (int i = 0; i < file->message_type_count(); i++) {
if (!GenerateMessage(file->message_type(i), printer, error)) {
return false;
}
}
for (int i = 0; i < file->enum_type_count(); i++) {
GenerateEnum(file->enum_type(i), printer);
}
printer->Outdent();
printer->Print("end\n");
printer->Outdent();
printer->Print(
"end\n\n");
return true;
}
bool GenerateBinaryDescriptor(const FileDescriptor* file, io::Printer* printer,
std::string* error) {
printer->Print(
R"(descriptor_data = File.binread(__FILE__).split("\n__END__\n", 2)[1])");
printer->Print(
"\nGoogle::Protobuf::DescriptorPool.generated_pool.add_serialized_file("
"descriptor_data)\n\n");
return true;
}
bool GenerateFile(const FileDescriptor* file, io::Printer* printer,
std::string* error) {
printer->Print(
@ -535,9 +571,6 @@ bool GenerateFile(const FileDescriptor* file, io::Printer* printer,
"\n",
"filename", file->name());
printer->Print(
"require 'google/protobuf'\n\n");
for (int i = 0; i < file->dependency_count(); i++) {
if (!MaybeEmitDependency(file->dependency(i), file, printer, error)) {
return false;
@ -550,25 +583,13 @@ bool GenerateFile(const FileDescriptor* file, io::Printer* printer,
GOOGLE_LOG(WARNING) << "Extensions are not yet supported for proto2 .proto files.";
}
printer->Print("Google::Protobuf::DescriptorPool.generated_pool.build do\n");
printer->Indent();
printer->Print("add_file(\"$filename$\", :syntax => :$syntax$) do\n",
"filename", file->name(), "syntax",
StringifySyntax(file->syntax()));
printer->Indent();
for (int i = 0; i < file->message_type_count(); i++) {
if (!GenerateMessage(file->message_type(i), printer, error)) {
return false;
}
}
for (int i = 0; i < file->enum_type_count(); i++) {
GenerateEnum(file->enum_type(i), printer);
bool use_raw_descriptor = file->name() == "google/protobuf/descriptor.proto";
if (use_raw_descriptor) {
GenerateBinaryDescriptor(file, printer, error);
} else {
GenerateDslDescriptor(file, printer, error);
}
printer->Outdent();
printer->Print("end\n");
printer->Outdent();
printer->Print(
"end\n\n");
int levels = GeneratePackageModules(file, printer);
for (int i = 0; i < file->message_type_count(); i++) {
@ -578,6 +599,15 @@ bool GenerateFile(const FileDescriptor* file, io::Printer* printer,
GenerateEnumAssignment("", file->enum_type(i), printer);
}
EndPackageModules(levels, printer);
if (use_raw_descriptor) {
printer->Print("\n__END__\n");
FileDescriptorProto file_proto;
file->CopyTo(&file_proto);
std::string file_data;
file_proto.SerializeToString(&file_data);
printer->Print("$raw_descriptor$", "raw_descriptor", file_data);
}
return true;
}

@ -438,7 +438,7 @@ build_ruby27() {
}
build_ruby30() {
internal_build_cpp # For conformance tests.
cd ruby && bash travis-test.sh ruby-3.0.0 && cd ..
cd ruby && bash travis-test.sh ruby-3.0.2 && cd ..
}
build_jruby() {

Loading…
Cancel
Save