Aggregate Metadata Files (#7155) (#7194)

Instead of calling initOnce of dependencies, initialize metadata of dependencies in the same file.

Needs to pass aggregate_metadata option to protoc to trigger, e.g.:
--php_out=aggregate_metadata=foo#bar:generated_dir
For each input file, transitive dependencies (including itself), whose package name has the prefix of foo or bar, will be aggregated, in which their metadata string will be aggregated in the same internalAddGeneratedFile call. For other dependencies, initOnce is called as before.

This feature is EXPERIMENTAL. DO NOT USE!!!
pull/7201/head
Paul Yang 5 years ago committed by GitHub
parent fedb2beee3
commit 537c5aa9e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      php/composer.json
  2. 134
      php/ext/google/protobuf/def.c
  3. 14
      php/src/Google/Protobuf/Internal/DescriptorPool.php
  4. 1
      php/src/Google/Protobuf/Internal/Message.php
  5. 2
      php/tests/proto/test_import_descriptor_proto.proto
  6. 1
      php/tests/well_known_test.php
  7. 237
      src/google/protobuf/compiler/php/php_generator.cc
  8. 14
      src/google/protobuf/compiler/php/php_generator.h

@ -23,6 +23,7 @@
}
},
"scripts": {
"test": "(cd tests && rm -rf generated && mkdir -p generated && ../../src/protoc --php_out=generated -I../../src -I. proto/empty/echo.proto proto/test.proto proto/test_include.proto proto/test_no_namespace.proto proto/test_prefix.proto proto/test_php_namespace.proto proto/test_empty_php_namespace.proto proto/test_reserved_enum_lower.proto proto/test_reserved_enum_upper.proto proto/test_reserved_enum_value_lower.proto proto/test_reserved_enum_value_upper.proto proto/test_reserved_message_lower.proto proto/test_reserved_message_upper.proto proto/test_service.proto proto/test_service_namespace.proto proto/test_wrapper_type_setters.proto proto/test_descriptors.proto) && (cd ../src && ./protoc --php_out=../php/tests/generated -I../php/tests -I. ../php/tests/proto/test_import_descriptor_proto.proto) && vendor/bin/phpunit"
"test": "(cd tests && rm -rf generated && mkdir -p generated && ../../src/protoc --php_out=generated -I../../src -I. proto/empty/echo.proto proto/test.proto proto/test_include.proto proto/test_no_namespace.proto proto/test_prefix.proto proto/test_php_namespace.proto proto/test_empty_php_namespace.proto proto/test_reserved_enum_lower.proto proto/test_reserved_enum_upper.proto proto/test_reserved_enum_value_lower.proto proto/test_reserved_enum_value_upper.proto proto/test_reserved_message_lower.proto proto/test_reserved_message_upper.proto proto/test_service.proto proto/test_service_namespace.proto proto/test_wrapper_type_setters.proto proto/test_descriptors.proto) && (cd ../src && ./protoc --php_out=../php/tests/generated -I../php/tests -I. ../php/tests/proto/test_import_descriptor_proto.proto) && vendor/bin/phpunit",
"aggregate_metadata_test": "(cd tests && rm -rf generated && mkdir -p generated && ../../src/protoc --php_out=aggregate_metadata=foo#bar:generated -I../../src -I. proto/test.proto proto/test_include.proto && ../../src/protoc --php_out=generated -I../../src -I. proto/empty/echo.proto proto/test_no_namespace.proto proto/test_empty_php_namespace.proto proto/test_prefix.proto proto/test_php_namespace.proto proto/test_reserved_enum_lower.proto proto/test_reserved_enum_upper.proto proto/test_reserved_enum_value_lower.proto proto/test_reserved_enum_value_upper.proto proto/test_reserved_message_lower.proto proto/test_reserved_message_upper.proto proto/test_service.proto proto/test_service_namespace.proto proto/test_wrapper_type_setters.proto proto/test_descriptors.proto) && (cd ../src && ./protoc --php_out=aggregate_metadata=foo:../php/tests/generated -I../php/tests -I. ../php/tests/proto/test_import_descriptor_proto.proto) && vendor/bin/phpunit"
}
}

@ -891,74 +891,15 @@ bool depends_on_descriptor(const google_protobuf_FileDescriptorProto* file) {
return false;
}
const upb_filedef *parse_and_add_descriptor(const char *data,
PHP_PROTO_SIZE data_len,
InternalDescriptorPoolImpl *pool,
upb_arena *arena) {
size_t n;
google_protobuf_FileDescriptorSet *set;
const google_protobuf_FileDescriptorProto* const* files;
const upb_filedef* file;
upb_status status;
set = google_protobuf_FileDescriptorSet_parse(
data, data_len, arena);
if (!set) {
zend_error(E_ERROR, "Failed to parse binary descriptor\n");
return NULL;
}
files = google_protobuf_FileDescriptorSet_file(set, &n);
if (n != 1) {
zend_error(E_ERROR, "Serialized descriptors should have exactly one file");
return NULL;
}
// Check whether file has already been added.
upb_strview name = google_protobuf_FileDescriptorProto_name(files[0]);
// TODO(teboring): Needs another look up method which takes data and length.
file = upb_symtab_lookupfile2(pool->symtab, name.data, name.size);
if (file != NULL) {
return NULL;
}
// The PHP code generator currently special-cases descriptor.proto. It
// doesn't add it as a dependency even if the proto file actually does
// depend on it.
if (depends_on_descriptor(files[0]) &&
upb_symtab_lookupfile(pool->symtab, "google/protobuf/descriptor.proto") ==
NULL) {
if (!parse_and_add_descriptor((char *)descriptor_proto,
descriptor_proto_len, pool, arena)) {
return NULL;
}
}
upb_status_clear(&status);
file = upb_symtab_addfile(pool->symtab, files[0], &status);
check_upb_status(&status, "Unable to load descriptor");
return file;
}
void internal_add_generated_file(const char *data, PHP_PROTO_SIZE data_len,
InternalDescriptorPoolImpl *pool,
static void internal_add_single_generated_file(
const upb_filedef* file,
InternalDescriptorPoolImpl* pool,
bool use_nested_submsg TSRMLS_DC) {
int i;
upb_arena *arena;
const upb_filedef* file;
arena = upb_arena_new();
file = parse_and_add_descriptor(data, data_len, pool, arena);
upb_arena_free(arena);
if (!file) return;
size_t i;
// For each enum/message, we need its PHP class, upb descriptor and its PHP
// wrapper. These information are needed later for encoding, decoding and type
// checking. However, sometimes we just have one of them. In order to find
// them quickly, here, we store the mapping for them.
for (i = 0; i < upb_filedef_msgcount(file); i++) {
const upb_msgdef *msgdef = upb_filedef_msg(file, i);
CREATE_HASHTABLE_VALUE(desc, desc_php, Descriptor, descriptor_type);
@ -1000,6 +941,73 @@ void internal_add_generated_file(const char *data, PHP_PROTO_SIZE data_len,
}
}
const bool parse_and_add_descriptor(const char *data,
PHP_PROTO_SIZE data_len,
InternalDescriptorPoolImpl *pool,
upb_arena *arena,
bool use_nested_submsg TSRMLS_DC) {
size_t i, n;
google_protobuf_FileDescriptorSet *set;
const google_protobuf_FileDescriptorProto* const* files;
const upb_filedef* file;
upb_status status;
set = google_protobuf_FileDescriptorSet_parse(
data, data_len, arena);
if (!set) {
zend_error(E_ERROR, "Failed to parse binary descriptor\n");
return false;
}
files = google_protobuf_FileDescriptorSet_file(set, &n);
for (i = 0; i < n; i++) {
// Check whether file has already been added.
upb_strview name = google_protobuf_FileDescriptorProto_name(files[i]);
// TODO(teboring): Needs another look up method which takes data and length.
file = upb_symtab_lookupfile2(pool->symtab, name.data, name.size);
if (file != NULL) {
continue;
}
// The PHP code generator currently special-cases descriptor.proto. It
// doesn't add it as a dependency even if the proto file actually does
// depend on it.
if (depends_on_descriptor(files[i]) &&
upb_symtab_lookupfile(
pool->symtab, "google/protobuf/descriptor.proto") ==
NULL) {
if (!parse_and_add_descriptor((char *)descriptor_proto,
descriptor_proto_len, pool, arena,
use_nested_submsg TSRMLS_CC)) {
return false;
}
}
upb_status_clear(&status);
file = upb_symtab_addfile(pool->symtab, files[i], &status);
check_upb_status(&status, "Unable to load descriptor");
internal_add_single_generated_file(file, pool, use_nested_submsg TSRMLS_CC);
}
return true;
}
void internal_add_generated_file(const char *data, PHP_PROTO_SIZE data_len,
InternalDescriptorPoolImpl *pool,
bool use_nested_submsg TSRMLS_DC) {
int i;
upb_arena *arena;
arena = upb_arena_new();
parse_and_add_descriptor(data, data_len, pool, arena,
use_nested_submsg TSRMLS_CC);
upb_arena_free(arena);
return;
}
PHP_METHOD(InternalDescriptorPool, internalAddGeneratedFile) {
char *data = NULL;
PHP_PROTO_SIZE data_len;

@ -59,7 +59,9 @@ class DescriptorPool
{
$files = new FileDescriptorSet();
$files->mergeFromString($data);
$file = FileDescriptor::buildFromProto($files->getFile()[0]);
foreach($files->getFile() as $file_proto) {
$file = FileDescriptor::buildFromProto($file_proto);
foreach ($file->getMessageType() as $desc) {
$this->addDescriptor($desc);
@ -76,6 +78,7 @@ class DescriptorPool
}
unset($desc);
}
}
public function addMessage($name, $klass)
{
@ -149,8 +152,13 @@ class DescriptorPool
switch ($field->getType()) {
case GPBType::MESSAGE:
$proto = $field->getMessageType();
$field->setMessageType(
$this->getDescriptorByProtoName($proto));
$subdesc = $this->getDescriptorByProtoName($proto);
if (is_null($subdesc)) {
trigger_error(
'proto not added: ' . $proto
. " for " . $desc->getFullName(), E_ERROR);
}
$field->setMessageType($subdesc);
break;
case GPBType::ENUM:
$proto = $field->getEnumType();

@ -94,6 +94,7 @@ class Message
$this->desc = $pool->getDescriptorByClassName(get_class($this));
if (is_null($this->desc)) {
user_error(get_class($this) . " is not found in descriptor pool.");
return;
}
foreach ($this->desc->getField() as $field) {
$setter = $field->getSetter();

@ -1,5 +1,7 @@
syntax = "proto3";
package foo;
import "google/protobuf/descriptor.proto";
message TestImportDescriptorProto {

@ -4,6 +4,7 @@ require_once('test_base.php');
require_once('test_util.php');
use Foo\TestMessage;
use Foo\TestImportDescriptorProto;
use Google\Protobuf\Any;
use Google\Protobuf\Api;
use Google\Protobuf\BoolValue;

@ -37,6 +37,16 @@
#include <google/protobuf/io/printer.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/strutil.h>
#include <google/protobuf/any.pb.h>
#include <google/protobuf/api.pb.h>
#include <google/protobuf/duration.pb.h>
#include <google/protobuf/empty.pb.h>
#include <google/protobuf/field_mask.pb.h>
#include <google/protobuf/source_context.pb.h>
#include <google/protobuf/struct.pb.h>
#include <google/protobuf/timestamp.pb.h>
#include <google/protobuf/type.pb.h>
#include <google/protobuf/wrappers.pb.h>
#include <sstream>
@ -91,6 +101,9 @@ std::string UnderscoresToCamelCase(const string& name, bool cap_first_letter);
std::string BinaryToHex(const string& binary);
void Indent(io::Printer* printer);
void Outdent(io::Printer* printer);
void GenerateAddFilesToPool(const FileDescriptor* file,
const std::set<string>& aggregate_metadata_prefixes,
io::Printer* printer);
void GenerateMessageDocComment(io::Printer* printer, const Descriptor* message,
int is_descriptor);
void GenerateMessageConstructorDocComment(io::Printer* printer,
@ -111,7 +124,6 @@ void GenerateServiceDocComment(io::Printer* printer,
void GenerateServiceMethodDocComment(io::Printer* printer,
const MethodDescriptor* method);
std::string ReservedNamePrefix(const string& classname,
const FileDescriptor* file) {
bool is_reserved = false;
@ -924,13 +936,20 @@ void GenerateMessageToPool(const string& name_prefix, const Descriptor* message,
}
}
void GenerateAddFileToPool(const FileDescriptor* file, bool is_descriptor,
void GenerateAddFileToPool(
const FileDescriptor* file,
bool is_descriptor,
bool aggregate_metadata,
const std::set<string>& aggregate_metadata_prefixes,
io::Printer* printer) {
printer->Print(
"public static $is_initialized = false;\n\n"
"public static function initOnce() {\n");
Indent(printer);
if (aggregate_metadata) {
GenerateAddFilesToPool(file, aggregate_metadata_prefixes, printer);
} else {
printer->Print(
"$pool = \\Google\\Protobuf\\Internal\\"
"DescriptorPool::getGeneratedPool();\n\n"
@ -994,14 +1013,9 @@ void GenerateAddFileToPool(const FileDescriptor* file, bool is_descriptor,
printer->Print("$pool->internalAddGeneratedFile(hex2bin(\n");
Indent(printer);
// Only write 30 bytes per line.
static const int kBytesPerLine = 30;
for (int i = 0; i < files_data.size(); i += kBytesPerLine) {
printer->Print(
"\"^data^\"^dot^\n",
"data", BinaryToHex(files_data.substr(i, kBytesPerLine)),
"dot", i + kBytesPerLine < files_data.size() ? " ." : "");
}
"\"^data^\"\n",
"data", BinaryToHex(files_data));
Outdent(printer);
printer->Print(
@ -1009,10 +1023,146 @@ void GenerateAddFileToPool(const FileDescriptor* file, bool is_descriptor,
}
printer->Print(
"static::$is_initialized = true;\n");
}
Outdent(printer);
printer->Print("}\n");
}
static void AnalyzeDependencyForFile(
const FileDescriptor* file,
std::set<const FileDescriptor*>* nodes_without_dependency,
std::map<const FileDescriptor*, std::set<const FileDescriptor*>>* deps,
std::map<const FileDescriptor*, int>* dependency_count) {
int count = file->dependency_count();
for (int i = 0; i < file->dependency_count(); i++) {
const FileDescriptor* dependency = file->dependency(i);
if (dependency->name() == kDescriptorFile) {
count--;
break;
}
}
if (count == 0) {
nodes_without_dependency->insert(file);
} else {
(*dependency_count)[file] = count;
for (int i = 0; i < file->dependency_count(); i++) {
const FileDescriptor* dependency = file->dependency(i);
if (dependency->name() == kDescriptorFile) {
continue;
}
if (deps->find(dependency) == deps->end()) {
(*deps)[dependency] = std::set<const FileDescriptor*>();
}
(*deps)[dependency].insert(file);
AnalyzeDependencyForFile(
dependency, nodes_without_dependency, deps, dependency_count);
}
}
}
static bool NeedsUnwrapping(
const FileDescriptor* file,
const std::set<string>& aggregate_metadata_prefixes) {
bool has_aggregate_metadata_prefix = false;
if (aggregate_metadata_prefixes.empty()) {
has_aggregate_metadata_prefix = true;
} else {
for (const auto& prefix : aggregate_metadata_prefixes) {
if (HasPrefixString(file->package(), prefix)) {
has_aggregate_metadata_prefix = true;
break;
}
}
}
return has_aggregate_metadata_prefix;
}
void GenerateAddFilesToPool(
const FileDescriptor* file,
const std::set<string>& aggregate_metadata_prefixes,
io::Printer* printer) {
printer->Print(
"$pool = \\Google\\Protobuf\\Internal\\"
"DescriptorPool::getGeneratedPool();\n"
"if (static::$is_initialized == true) {\n"
" return;\n"
"}\n");
// Sort files according to dependency
std::map<const FileDescriptor*, std::set<const FileDescriptor*>> deps;
std::map<const FileDescriptor*, int> dependency_count;
std::set<const FileDescriptor*> nodes_without_dependency;
FileDescriptorSet sorted_file_set;
AnalyzeDependencyForFile(
file, &nodes_without_dependency, &deps, &dependency_count);
while (!nodes_without_dependency.empty()) {
auto file = *nodes_without_dependency.begin();
nodes_without_dependency.erase(file);
for (auto dependent : deps[file]) {
if (dependency_count[dependent] == 1) {
dependency_count.erase(dependent);
nodes_without_dependency.insert(dependent);
} else {
dependency_count[dependent] -= 1;
}
}
bool needs_aggregate = NeedsUnwrapping(file, aggregate_metadata_prefixes);
if (needs_aggregate) {
auto file_proto = sorted_file_set.add_file();
file->CopyTo(file_proto);
// Filter out descriptor.proto as it cannot be depended on for now.
RepeatedPtrField<string>* dependency = file_proto->mutable_dependency();
for (RepeatedPtrField<string>::iterator it = dependency->begin();
it != dependency->end(); ++it) {
if (*it != kDescriptorFile) {
dependency->erase(it);
break;
}
}
// Filter out all extensions, since we do not support extension yet.
file_proto->clear_extension();
RepeatedPtrField<DescriptorProto>* message_type =
file_proto->mutable_message_type();
for (RepeatedPtrField<DescriptorProto>::iterator it = message_type->begin();
it != message_type->end(); ++it) {
it->clear_extension();
}
} else {
std::string dependency_filename =
GeneratedMetadataFileName(file, false);
printer->Print(
"\\^name^::initOnce();\n",
"name", FilenameToClassname(dependency_filename));
}
}
string files_data;
sorted_file_set.SerializeToString(&files_data);
printer->Print("$pool->internalAddGeneratedFile(hex2bin(\n");
Indent(printer);
printer->Print(
"\"^data^\"\n",
"data", BinaryToHex(files_data));
Outdent(printer);
printer->Print(
"), true);\n");
printer->Print(
"static::$is_initialized = true;\n");
}
void GenerateUseDeclaration(bool is_descriptor, io::Printer* printer) {
if (!is_descriptor) {
printer->Print(
@ -1051,6 +1201,8 @@ std::string FilenameToClassname(const string& filename) {
void GenerateMetadataFile(const FileDescriptor* file,
bool is_descriptor,
bool aggregate_metadata,
const std::set<string>& aggregate_metadata_prefixes,
GeneratorContext* generator_context) {
std::string filename = GeneratedMetadataFileName(file, is_descriptor);
std::unique_ptr<io::ZeroCopyOutputStream> output(
@ -1079,7 +1231,8 @@ void GenerateMetadataFile(const FileDescriptor* file,
}
Indent(&printer);
GenerateAddFileToPool(file, is_descriptor, &printer);
GenerateAddFileToPool(file, is_descriptor, aggregate_metadata,
aggregate_metadata_prefixes, &printer);
Outdent(&printer);
printer.Print("}\n\n");
@ -1229,6 +1382,7 @@ void GenerateEnumFile(const FileDescriptor* file, const EnumDescriptor* en,
void GenerateMessageFile(const FileDescriptor* file, const Descriptor* message,
bool is_descriptor,
bool aggregate_metadata,
GeneratorContext* generator_context) {
// Don't generate MapEntry messages -- we use the PHP extension's native
// support for map fields instead.
@ -1285,10 +1439,12 @@ void GenerateMessageFile(const FileDescriptor* file, const Descriptor* message,
GeneratedMetadataFileName(file, is_descriptor);
std::string metadata_fullname = FilenameToClassname(metadata_filename);
printer.Print(
"\\^fullname^::initOnce();\n"
"parent::__construct($data);\n",
"\\^fullname^::initOnce();\n",
"fullname", metadata_fullname);
printer.Print(
"parent::__construct($data);\n");
Outdent(&printer);
printer.Print("}\n\n");
@ -1328,6 +1484,7 @@ void GenerateMessageFile(const FileDescriptor* file, const Descriptor* message,
// Nested messages and enums.
for (int i = 0; i < message->nested_type_count(); i++) {
GenerateMessageFile(file, message->nested_type(i), is_descriptor,
aggregate_metadata,
generator_context);
}
for (int i = 0; i < message->enum_type_count(); i++) {
@ -1384,10 +1541,15 @@ void GenerateServiceFile(const FileDescriptor* file,
}
void GenerateFile(const FileDescriptor* file, bool is_descriptor,
bool aggregate_metadata,
const std::set<string>& aggregate_metadata_prefixes,
GeneratorContext* generator_context) {
GenerateMetadataFile(file, is_descriptor, generator_context);
GenerateMetadataFile(file, is_descriptor, aggregate_metadata,
aggregate_metadata_prefixes, generator_context);
for (int i = 0; i < file->message_type_count(); i++) {
GenerateMessageFile(file, file->message_type(i), is_descriptor,
aggregate_metadata,
generator_context);
}
for (int i = 0; i < file->enum_type_count(); i++) {
@ -1653,8 +1815,17 @@ void GenerateServiceMethodDocComment(io::Printer* printer,
bool Generator::Generate(const FileDescriptor* file, const string& parameter,
GeneratorContext* generator_context,
string* error) const {
bool is_descriptor = parameter == "internal";
return Generate(file, false, false, std::set<string>(),
generator_context, error);
}
bool Generator::Generate(
const FileDescriptor* file,
bool is_descriptor,
bool aggregate_metadata,
const std::set<string>& aggregate_metadata_prefixes,
GeneratorContext* generator_context,
string* error) const {
if (is_descriptor && file->name() != kDescriptorFile) {
*error =
"Can only generate PHP code for google/protobuf/descriptor.proto.\n";
@ -1668,11 +1839,47 @@ bool Generator::Generate(const FileDescriptor* file, const string& parameter,
return false;
}
GenerateFile(file, is_descriptor, generator_context);
GenerateFile(file, is_descriptor, aggregate_metadata,
aggregate_metadata_prefixes, generator_context);
return true;
}
bool Generator::GenerateAll(const std::vector<const FileDescriptor*>& files,
const std::string& parameter,
GeneratorContext* generator_context,
std::string* error) const {
bool is_descriptor = false;
bool aggregate_metadata = false;
std::set<string> aggregate_metadata_prefixes;
for (const auto& option : Split(parameter, ",")) {
const auto option_pair = Split(option, "=");
if (HasPrefixString(option_pair[0], "aggregate_metadata")) {
string options_string = option_pair[1];
const auto options = Split(options_string, "#", false);
aggregate_metadata = true;
for (int i = 0; i < options.size(); i++) {
aggregate_metadata_prefixes.insert(options[i]);
GOOGLE_LOG(INFO) << options[i];
}
}
if (option_pair[0] == "internal") {
is_descriptor = true;
}
}
for (auto file : files) {
if (!Generate(
file, is_descriptor, aggregate_metadata,
aggregate_metadata_prefixes,
generator_context, error)) {
return false;
}
}
return true;
}
} // namespace php
} // namespace compiler
} // namespace protobuf

@ -44,10 +44,24 @@ namespace compiler {
namespace php {
class PROTOC_EXPORT Generator : public CodeGenerator {
public:
virtual bool Generate(
const FileDescriptor* file,
const string& parameter,
GeneratorContext* generator_context,
string* error) const override;
bool GenerateAll(const std::vector<const FileDescriptor*>& files,
const std::string& parameter,
GeneratorContext* generator_context,
std::string* error) const override;
private:
bool Generate(
const FileDescriptor* file,
bool is_descriptor,
bool aggregate_metadata,
const std::set<string>& aggregate_metadata_prefixes,
GeneratorContext* generator_context,
string* error) const;
};

Loading…
Cancel
Save