parent
ed4321d1cb
commit
c703061d49
9 changed files with 685 additions and 122 deletions
@ -1,12 +1,12 @@ |
||||
package main |
||||
|
||||
import ( |
||||
benchmarkWrapper "./tmp" |
||||
googleMessage1Proto2 "./tmp/datasets/google_message1/proto2" |
||||
googleMessage1Proto3 "./tmp/datasets/google_message1/proto3" |
||||
googleMessage2 "./tmp/datasets/google_message2" |
||||
googleMessage3 "./tmp/datasets/google_message3" |
||||
googleMessage4 "./tmp/datasets/google_message4" |
||||
benchmarkWrapper "../tmp" |
||||
googleMessage1Proto2 "../tmp/datasets/google_message1/proto2" |
||||
googleMessage1Proto3 "../tmp/datasets/google_message1/proto3" |
||||
googleMessage2 "../tmp/datasets/google_message2" |
||||
googleMessage3 "../tmp/datasets/google_message3" |
||||
googleMessage4 "../tmp/datasets/google_message4" |
||||
"flag" |
||||
"github.com/golang/protobuf/proto" |
||||
"io/ioutil" |
@ -1,8 +1,8 @@ |
||||
#include <Python.h> |
||||
|
||||
#include "benchmarks.pb.h" |
||||
#include "datasets/google_message1/benchmark_message1_proto2.pb.h" |
||||
#include "datasets/google_message1/benchmark_message1_proto3.pb.h" |
||||
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" |
||||
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" |
||||
#include "datasets/google_message2/benchmark_message2.pb.h" |
||||
#include "datasets/google_message3/benchmark_message3.pb.h" |
||||
#include "datasets/google_message4/benchmark_message4.pb.h" |
@ -0,0 +1,105 @@ |
||||
#include "benchmarks.pb.h" |
||||
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" |
||||
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" |
||||
#include "datasets/google_message2/benchmark_message2.pb.h" |
||||
#include "datasets/google_message3/benchmark_message3.pb.h" |
||||
#include "datasets/google_message4/benchmark_message4.pb.h" |
||||
|
||||
#include "google/protobuf/message.h" |
||||
#include "google/protobuf/descriptor.h" |
||||
|
||||
#include <fstream> |
||||
|
||||
using google::protobuf::FieldDescriptor; |
||||
using google::protobuf::Message; |
||||
using google::protobuf::Reflection; |
||||
|
||||
|
||||
class DataGroupStripper { |
||||
public: |
||||
static void StripMessage(Message *message) { |
||||
std::vector<const FieldDescriptor*> set_fields; |
||||
const Reflection* reflection = message->GetReflection(); |
||||
reflection->ListFields(*message, &set_fields); |
||||
|
||||
for (size_t i = 0; i < set_fields.size(); i++) { |
||||
const FieldDescriptor* field = set_fields[i]; |
||||
if (field->type() == FieldDescriptor::TYPE_GROUP) { |
||||
reflection->ClearField(message, field); |
||||
} |
||||
if (field->type() == FieldDescriptor::TYPE_MESSAGE) { |
||||
if (field->is_repeated()) { |
||||
for (int j = 0; j < reflection->FieldSize(*message, field); j++) { |
||||
StripMessage(reflection->MutableRepeatedMessage(message, field, j)); |
||||
} |
||||
} else { |
||||
StripMessage(reflection->MutableMessage(message, field)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
reflection->MutableUnknownFields(message)->Clear(); |
||||
} |
||||
}; |
||||
|
||||
std::string ReadFile(const std::string& name) { |
||||
std::ifstream file(name.c_str()); |
||||
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" |
||||
<< name |
||||
<< "', please make sure you are running this command from the benchmarks" |
||||
<< " directory.\n"; |
||||
return std::string((std::istreambuf_iterator<char>(file)), |
||||
std::istreambuf_iterator<char>()); |
||||
} |
||||
|
||||
int main(int argc, char *argv[]) { |
||||
if (argc % 2 == 0 || argc == 1) { |
||||
std::cerr << "Usage: [input_files] [output_file_names] where " << |
||||
"input_files are one to one mapping to output_file_names." << |
||||
std::endl; |
||||
return 1; |
||||
} |
||||
|
||||
for (int i = argc / 2; i > 0; i--) { |
||||
const std::string &input_file = argv[i]; |
||||
const std::string &output_file = argv[i + argc / 2]; |
||||
|
||||
std::cerr << "Generating " << input_file |
||||
<< " to " << output_file << std::endl; |
||||
benchmarks::BenchmarkDataset dataset; |
||||
Message* message; |
||||
std::string dataset_payload = ReadFile(input_file); |
||||
GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) |
||||
<< "Can' t parse data file " << input_file; |
||||
|
||||
if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { |
||||
message = new benchmarks::proto3::GoogleMessage1; |
||||
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { |
||||
message = new benchmarks::proto2::GoogleMessage1; |
||||
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { |
||||
message = new benchmarks::proto2::GoogleMessage2; |
||||
} else if (dataset.message_name() == |
||||
"benchmarks.google_message3.GoogleMessage3") { |
||||
message = new benchmarks::google_message3::GoogleMessage3; |
||||
} else if (dataset.message_name() == |
||||
"benchmarks.google_message4.GoogleMessage4") { |
||||
message = new benchmarks::google_message4::GoogleMessage4; |
||||
} else { |
||||
std::cerr << "Unknown message type: " << dataset.message_name(); |
||||
exit(1); |
||||
} |
||||
|
||||
for (int i = 0; i < dataset.payload_size(); i++) { |
||||
message->ParseFromString(dataset.payload(i)); |
||||
DataGroupStripper::StripMessage(message); |
||||
dataset.set_payload(i, message->SerializeAsString()); |
||||
} |
||||
|
||||
std::ofstream ofs(output_file); |
||||
ofs << dataset.SerializeAsString(); |
||||
ofs.close(); |
||||
} |
||||
|
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,103 @@ |
||||
#include "google/protobuf/compiler/code_generator.h" |
||||
#include "google/protobuf/io/zero_copy_stream.h" |
||||
#include "google/protobuf/io/printer.h" |
||||
#include "google/protobuf/descriptor.h" |
||||
#include "google/protobuf/descriptor.pb.h" |
||||
#include "schema_proto2_to_proto3_util.h" |
||||
|
||||
#include "google/protobuf/compiler/plugin.h" |
||||
|
||||
using google::protobuf::FileDescriptorProto; |
||||
using google::protobuf::FileDescriptor; |
||||
using google::protobuf::DescriptorPool; |
||||
using google::protobuf::io::Printer; |
||||
using google::protobuf::util::SchemaGroupStripper; |
||||
using google::protobuf::util::SchemaAddZeroEnumValue; |
||||
|
||||
namespace google { |
||||
namespace protobuf { |
||||
namespace compiler { |
||||
|
||||
namespace { |
||||
|
||||
string StripProto(string filename) { |
||||
if (filename.substr(filename.size() - 11) == ".protodevel") { |
||||
// .protodevel
|
||||
return filename.substr(0, filename.size() - 11); |
||||
} else { |
||||
// .proto
|
||||
return filename.substr(0, filename.size() - 6); |
||||
} |
||||
} |
||||
|
||||
DescriptorPool new_pool_; |
||||
|
||||
} // namespace
|
||||
|
||||
class GoGoProtoGenerator : public CodeGenerator { |
||||
public: |
||||
virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files, |
||||
const string& parameter, |
||||
GeneratorContext* context, |
||||
string* error) const { |
||||
for (int i = 0; i < files.size(); i++) { |
||||
for (auto file : files) { |
||||
bool can_generate = |
||||
(new_pool_.FindFileByName(file->name()) == nullptr); |
||||
for (int j = 0; j < file->dependency_count(); j++) { |
||||
can_generate &= (new_pool_.FindFileByName( |
||||
file->dependency(j)->name()) != nullptr); |
||||
} |
||||
for (int j = 0; j < file->public_dependency_count(); j++) { |
||||
can_generate &= (new_pool_.FindFileByName( |
||||
file->public_dependency(j)->name()) != nullptr); |
||||
} |
||||
for (int j = 0; j < file->weak_dependency_count(); j++) { |
||||
can_generate &= (new_pool_.FindFileByName( |
||||
file->weak_dependency(j)->name()) != nullptr); |
||||
} |
||||
if (can_generate) { |
||||
Generate(file, parameter, context, error); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return true; |
||||
} |
||||
|
||||
virtual bool Generate(const FileDescriptor* file, |
||||
const string& parameter, |
||||
GeneratorContext* context, |
||||
string* error) const { |
||||
FileDescriptorProto new_file; |
||||
file->CopyTo(&new_file); |
||||
SchemaGroupStripper::StripFile(file, &new_file); |
||||
|
||||
SchemaAddZeroEnumValue enum_scrubber; |
||||
enum_scrubber.ScrubFile(&new_file); |
||||
|
||||
string filename = file->name(); |
||||
string basename = StripProto(filename); |
||||
|
||||
std::vector<std::pair<string,string>> option_pairs; |
||||
ParseGeneratorParameter(parameter, &option_pairs); |
||||
|
||||
std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output( |
||||
context->Open(basename + ".proto")); |
||||
string content = new_pool_.BuildFile(new_file)->DebugString(); |
||||
Printer printer(output.get(), '$'); |
||||
printer.WriteRaw(content.c_str(), content.size()); |
||||
|
||||
return true; |
||||
} |
||||
}; |
||||
|
||||
} // namespace compiler
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
int main(int argc, char* argv[]) { |
||||
google::protobuf::compiler::GoGoProtoGenerator generator; |
||||
return google::protobuf::compiler::PluginMain(argc, argv, &generator); |
||||
} |
@ -0,0 +1,137 @@ |
||||
#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ |
||||
#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_ |
||||
|
||||
#include "google/protobuf/message.h" |
||||
#include "google/protobuf/descriptor.h" |
||||
#include "google/protobuf/descriptor.pb.h" |
||||
|
||||
#include <sstream> |
||||
#include <algorithm> |
||||
|
||||
using google::protobuf::Descriptor; |
||||
using google::protobuf::DescriptorProto; |
||||
using google::protobuf::FileDescriptorProto; |
||||
using google::protobuf::FieldDescriptorProto; |
||||
using google::protobuf::Message; |
||||
using google::protobuf::EnumValueDescriptorProto; |
||||
|
||||
namespace google { |
||||
namespace protobuf { |
||||
namespace util { |
||||
|
||||
class SchemaGroupStripper { |
||||
|
||||
public: |
||||
static void StripFile(const FileDescriptor* old_file, |
||||
FileDescriptorProto *file) { |
||||
for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) { |
||||
if (IsMessageSet(old_file->message_type(i))) { |
||||
file->mutable_message_type()->DeleteSubrange(i, 1); |
||||
continue; |
||||
} |
||||
StripMessage(old_file->message_type(i), file->mutable_message_type(i)); |
||||
} |
||||
for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) { |
||||
auto field = old_file->extension(i); |
||||
if (field->type() == FieldDescriptor::TYPE_GROUP || |
||||
IsMessageSet(field->message_type()) || |
||||
IsMessageSet(field->containing_type())) { |
||||
file->mutable_extension()->DeleteSubrange(i, 1); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private: |
||||
static bool IsMessageSet(const Descriptor *descriptor) { |
||||
if (descriptor != nullptr |
||||
&& descriptor->options().message_set_wire_format()) { |
||||
return true; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
static void StripMessage(const Descriptor *old_message, |
||||
DescriptorProto *new_message) { |
||||
for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) { |
||||
if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP || |
||||
IsMessageSet(old_message->field(i)->message_type())) { |
||||
new_message->mutable_field()->DeleteSubrange(i, 1); |
||||
} |
||||
} |
||||
for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) { |
||||
auto field_type_name = new_message->mutable_extension(i)->type_name(); |
||||
if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP || |
||||
IsMessageSet(old_message->extension(i)->containing_type()) || |
||||
IsMessageSet(old_message->extension(i)->message_type())) { |
||||
new_message->mutable_extension()->DeleteSubrange(i, 1); |
||||
} |
||||
} |
||||
for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) { |
||||
StripMessage(old_message->nested_type(i), |
||||
new_message->mutable_nested_type(i)); |
||||
} |
||||
} |
||||
|
||||
}; |
||||
|
||||
class SchemaAddZeroEnumValue { |
||||
|
||||
public: |
||||
SchemaAddZeroEnumValue() |
||||
: total_added_(0) { |
||||
} |
||||
|
||||
void ScrubFile(FileDescriptorProto *file) { |
||||
for (int i = 0; i < file->enum_type_size(); i++) { |
||||
ScrubEnum(file->mutable_enum_type(i)); |
||||
} |
||||
for (int i = 0; i < file->mutable_message_type()->size(); i++) { |
||||
ScrubMessage(file->mutable_message_type(i)); |
||||
} |
||||
} |
||||
|
||||
private: |
||||
void ScrubEnum(EnumDescriptorProto *enum_type) { |
||||
if (enum_type->value(0).number() != 0) { |
||||
bool has_zero = false; |
||||
for (int j = 0; j < enum_type->value().size(); j++) { |
||||
if (enum_type->value(j).number() == 0) { |
||||
EnumValueDescriptorProto temp_enum_value; |
||||
temp_enum_value.CopyFrom(enum_type->value(j)); |
||||
enum_type->mutable_value(j)->CopyFrom(enum_type->value(0)); |
||||
enum_type->mutable_value(0)->CopyFrom(temp_enum_value); |
||||
has_zero = true; |
||||
break; |
||||
} |
||||
} |
||||
if (!has_zero) { |
||||
enum_type->mutable_value()->Add(); |
||||
for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) { |
||||
enum_type->mutable_value(i)->CopyFrom( |
||||
*enum_type->mutable_value(i - 1)); |
||||
} |
||||
enum_type->mutable_value(0)->set_number(0); |
||||
enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" + |
||||
std::to_string(total_added_++)); |
||||
} |
||||
} |
||||
|
||||
} |
||||
|
||||
void ScrubMessage(DescriptorProto *message_type) { |
||||
for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) { |
||||
ScrubEnum(message_type->mutable_enum_type(i)); |
||||
} |
||||
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { |
||||
ScrubMessage(message_type->mutable_nested_type(i)); |
||||
} |
||||
} |
||||
|
||||
int total_added_; |
||||
}; |
||||
|
||||
} // namespace util
|
||||
} // namespace protobuf
|
||||
} // namespace google
|
||||
|
||||
#endif // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
|
Loading…
Reference in new issue