Refactored the C name mangling to not depend on C++ or upb reflection.

This yields several benefits:

1. The code no longer needs to be bootstrapped (since it no longer depends on upb reflection).
2. The upb code generator no longer depends on libprotobuf at all (except for `code_generator_lite.{h,cc}`, which is just one .cc file and has no deps).

PiperOrigin-RevId: 672280579
pull/18175/head
Joshua Haberman 6 months ago committed by Copybara-Service
parent da825e80b1
commit 5ee5891f7d
  1. 2
      cmake/upb_generators.cmake
  2. 15
      hpb_generator/gen_accessors.cc
  3. 6
      pkg/BUILD.bazel
  4. 14
      upb_generator/BUILD
  5. 2
      upb_generator/c/BUILD
  6. 76
      upb_generator/c/generator.cc
  7. 2
      upb_generator/minitable/BUILD
  8. 118
      upb_generator/names.cc
  9. 83
      upb_generator/names.h

@ -16,8 +16,6 @@ foreach(generator upb upbdefs upb_minitable)
)
target_include_directories(protoc-gen-${generator} PRIVATE ${bootstrap_cmake_dir})
target_link_libraries(protoc-gen-${generator}
${protobuf_LIB_PROTOBUF}
${protobuf_LIB_PROTOC}
${protobuf_LIB_UPB}
${protobuf_ABSL_USED_TARGETS}
)

@ -59,6 +59,11 @@ void WriteMapAccessorDefinitions(const protobuf::Descriptor* message,
std::string ResolveFieldName(const protobuf::FieldDescriptor* field,
const NameToFieldDescriptorMap& field_names);
upb::generator::NameMangler CreateNameMangler(
const protobuf::Descriptor* message) {
return upb::generator::NameMangler(upb::generator::GetCppFields(message));
}
NameToFieldDescriptorMap CreateFieldNameMap(
const protobuf::Descriptor* message) {
NameToFieldDescriptorMap field_names;
@ -75,12 +80,11 @@ void WriteFieldAccessorsInHeader(const protobuf::Descriptor* desc,
OutputIndenter i(output);
auto field_names = CreateFieldNameMap(desc);
auto upbc_field_names = upb::generator::CreateFieldNameMap(desc);
auto mangler = CreateNameMangler(desc);
for (const auto* field : FieldNumberOrder(desc)) {
std::string resolved_field_name = ResolveFieldName(field, field_names);
std::string resolved_upbc_name =
upb::generator::ResolveFieldName(field, upbc_field_names);
std::string resolved_upbc_name = mangler.ResolveFieldName(field->name());
WriteFieldAccessorHazzer(desc, field, resolved_field_name,
resolved_upbc_name, output);
WriteFieldAccessorClear(desc, field, resolved_field_name,
@ -191,14 +195,13 @@ void WriteAccessorsInSource(const protobuf::Descriptor* desc, Output& output) {
output("namespace internal {\n");
const char arena_expression[] = "arena_";
auto field_names = CreateFieldNameMap(desc);
auto upbc_field_names = upb::generator::CreateFieldNameMap(desc);
auto mangler = CreateNameMangler(desc);
// Generate const methods.
OutputIndenter i(output);
for (const auto* field : FieldNumberOrder(desc)) {
std::string resolved_field_name = ResolveFieldName(field, field_names);
std::string resolved_upbc_name =
upb::generator::ResolveFieldName(field, upbc_field_names);
std::string resolved_upbc_name = mangler.ResolveFieldName(field->name());
if (field->is_map()) {
WriteMapAccessorDefinitions(desc, field, resolved_field_name, class_name,
output);

@ -225,8 +225,6 @@ cc_dist_library(
cc_dist_library(
name = "protoc-gen-upb",
dist_deps = [
":protobuf",
":protoc",
":upb",
],
tags = ["manual"],
@ -238,8 +236,6 @@ cc_dist_library(
cc_dist_library(
name = "protoc-gen-upbdefs",
dist_deps = [
":protobuf",
":protoc",
":upb",
],
tags = ["manual"],
@ -251,8 +247,6 @@ cc_dist_library(
cc_dist_library(
name = "protoc-gen-upb_minitable",
dist_deps = [
":protobuf",
":protoc",
":upb",
],
tags = ["manual"],

@ -121,21 +121,13 @@ bootstrap_cc_library(
],
)
bootstrap_cc_library(
cc_library(
name = "names",
srcs = [
"names.cc",
],
hdrs = [
"names.h",
],
bootstrap_deps = [
"//upb/reflection:reflection",
],
srcs = ["names.cc"],
hdrs = ["names.h"],
copts = UPB_DEFAULT_CPPOPTS,
visibility = ["//upb:friend_generators"],
deps = [
"//:protobuf",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",

@ -31,7 +31,6 @@ bootstrap_cc_library(
bootstrap_deps = [
"//upb_generator:common",
"//upb_generator:file_layout",
"//upb_generator:names",
"//upb_generator:plugin",
"//upb_generator:plugin_upb_proto",
"//upb/reflection:descriptor_upb_proto",
@ -45,6 +44,7 @@ bootstrap_cc_library(
"//upb:mini_table",
"//upb:port",
"//upb:wire_reader",
"//upb_generator:names",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",

@ -412,10 +412,9 @@ void GenerateOneofInHeader(upb::OneofDefPtr oneof, const DefPoolPair& pools,
}
void GenerateHazzer(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
absl::string_view msg_name, const NameMangler& mangler,
const Options& options, Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
std::string resolved_name = mangler.ResolveFieldName(field.name());
if (field.has_presence()) {
output(
R"cc(
@ -429,15 +428,14 @@ void GenerateHazzer(upb::FieldDefPtr field, const DefPoolPair& pools,
}
void GenerateClear(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names, const Options& options,
Output& output) {
absl::string_view msg_name, const NameMangler& mangler,
const Options& options, Output& output) {
if (field == field.containing_type().map_key() ||
field == field.containing_type().map_value()) {
// Cannot be cleared.
return;
}
std::string resolved_name = ResolveFieldName(field, field_names);
std::string resolved_name = mangler.ResolveFieldName(field.name());
output(
R"cc(
UPB_INLINE void $0_clear_$1($0* msg) {
@ -449,10 +447,9 @@ void GenerateClear(upb::FieldDefPtr field, const DefPoolPair& pools,
}
void GenerateMapGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
absl::string_view msg_name, const NameMangler& mangler,
const Options& options, Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
std::string resolved_name = mangler.ResolveFieldName(field.name());
output(
R"cc(
UPB_INLINE size_t $0_$1_size(const $0* msg) {
@ -524,8 +521,8 @@ void GenerateMapEntryGetters(upb::FieldDefPtr field, absl::string_view msg_name,
void GenerateRepeatedGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
const Options& options, Output& output) {
const NameMangler& mangler, const Options& options,
Output& output) {
// Generate getter returning first item and size.
//
// Example:
@ -546,7 +543,7 @@ void GenerateRepeatedGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
)cc",
CTypeConst(field), // $0
msg_name, // $1
ResolveFieldName(field, field_names), // $2
mangler.ResolveFieldName(field.name()), // $2
FieldInitializerStrong(pools, field, options) // #3
);
// Generate private getter returning array or NULL for immutable and upb_Array
@ -577,7 +574,7 @@ void GenerateRepeatedGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
)cc",
CTypeConst(field), // $0
msg_name, // $1
ResolveFieldName(field, field_names), // $2
mangler.ResolveFieldName(field.name()), // $2
FieldInitializerStrong(pools, field, options), // $3
kRepeatedFieldArrayGetterPostfix, // $4
kRepeatedFieldMutableArrayGetterPostfix // $5
@ -586,9 +583,9 @@ void GenerateRepeatedGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
void GenerateScalarGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
const Options& Options, Output& output) {
std::string field_name = ResolveFieldName(field, field_names);
const NameMangler& mangler, const Options& Options,
Output& output) {
std::string field_name = mangler.ResolveFieldName(field.name());
output(
R"cc(
UPB_INLINE $0 $1_$2(const $1* msg) {
@ -605,26 +602,23 @@ void GenerateScalarGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
}
void GenerateGetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
absl::string_view msg_name, const NameMangler& mangler,
const Options& options, Output& output) {
if (field.IsMap()) {
GenerateMapGetters(field, pools, msg_name, field_names, options, output);
GenerateMapGetters(field, pools, msg_name, mangler, options, output);
} else if (field.containing_type().mapentry()) {
GenerateMapEntryGetters(field, msg_name, output);
} else if (field.IsSequence()) {
GenerateRepeatedGetters(field, pools, msg_name, field_names, options,
output);
GenerateRepeatedGetters(field, pools, msg_name, mangler, options, output);
} else {
GenerateScalarGetters(field, pools, msg_name, field_names, options, output);
GenerateScalarGetters(field, pools, msg_name, mangler, options, output);
}
}
void GenerateMapSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
absl::string_view msg_name, const NameMangler& mangler,
const Options& options, Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
std::string resolved_name = mangler.ResolveFieldName(field.name());
output(
R"cc(
UPB_INLINE void $0_$1_clear($0* msg) {
@ -674,9 +668,9 @@ void GenerateMapSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
void GenerateRepeatedSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
const Options& options, Output& output) {
std::string resolved_name = ResolveFieldName(field, field_names);
const NameMangler& mangler, const Options& options,
Output& output) {
std::string resolved_name = mangler.ResolveFieldName(field.name());
output(
R"cc(
UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* size) {
@ -748,14 +742,14 @@ void GenerateRepeatedSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
void GenerateNonRepeatedSetters(upb::FieldDefPtr field,
const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
const NameMangler& mangler,
const Options& options, Output& output) {
if (field == field.containing_type().map_key()) {
// Key cannot be mutated.
return;
}
std::string field_name = ResolveFieldName(field, field_names);
std::string field_name = mangler.ResolveFieldName(field.name());
if (field == field.containing_type().map_value()) {
output(R"cc(
@ -797,16 +791,14 @@ void GenerateNonRepeatedSetters(upb::FieldDefPtr field,
}
void GenerateSetters(upb::FieldDefPtr field, const DefPoolPair& pools,
absl::string_view msg_name,
const NameToFieldDefMap& field_names,
absl::string_view msg_name, const NameMangler& mangler,
const Options& options, Output& output) {
if (field.IsMap()) {
GenerateMapSetters(field, pools, msg_name, field_names, options, output);
GenerateMapSetters(field, pools, msg_name, mangler, options, output);
} else if (field.IsSequence()) {
GenerateRepeatedSetters(field, pools, msg_name, field_names, options,
output);
GenerateRepeatedSetters(field, pools, msg_name, mangler, options, output);
} else {
GenerateNonRepeatedSetters(field, pools, msg_name, field_names, options,
GenerateNonRepeatedSetters(field, pools, msg_name, mangler, options,
output);
}
}
@ -824,17 +816,17 @@ void GenerateMessageInHeader(upb::MessageDefPtr message,
GenerateOneofInHeader(message.oneof(i), pools, msg_name, options, output);
}
auto field_names = CreateFieldNameMap(message);
NameMangler mangler(GetUpbFields(message));
for (auto field : FieldNumberOrder(message)) {
GenerateClear(field, pools, msg_name, field_names, options, output);
GenerateGetters(field, pools, msg_name, field_names, options, output);
GenerateHazzer(field, pools, msg_name, field_names, options, output);
GenerateClear(field, pools, msg_name, mangler, options, output);
GenerateGetters(field, pools, msg_name, mangler, options, output);
GenerateHazzer(field, pools, msg_name, mangler, options, output);
}
output("\n");
for (auto field : FieldNumberOrder(message)) {
GenerateSetters(field, pools, msg_name, field_names, options, output);
GenerateSetters(field, pools, msg_name, mangler, options, output);
}
output("\n");

@ -27,7 +27,6 @@ bootstrap_cc_library(
bootstrap_deps = [
"//upb_generator:common",
"//upb_generator:file_layout",
"//upb_generator:names",
"//upb_generator:plugin",
"//upb_generator:plugin_upb_proto",
"//upb/reflection:descriptor_upb_proto",
@ -42,6 +41,7 @@ bootstrap_cc_library(
"//upb:port",
"//upb:wire_reader",
"//upb/mini_table:internal",
"//upb_generator:names",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log:absl_check",

@ -7,108 +7,58 @@
#include "upb_generator/names.h"
#include <array>
#include <cstdint>
#include <string>
#include "absl/container/flat_hash_map.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/descriptor.h"
#include "upb/reflection/def.hpp"
namespace upb {
namespace generator {
namespace protobuf = ::google::protobuf;
namespace {
// Prefixes used by C code generator for field access.
static constexpr absl::string_view kClearMethodPrefix = "clear_";
static constexpr absl::string_view kSetMethodPrefix = "set_";
static constexpr absl::string_view kHasMethodPrefix = "has_";
static constexpr absl::string_view kDeleteMethodPrefix = "delete_";
static constexpr absl::string_view kAddToRepeatedMethodPrefix = "add_";
static constexpr absl::string_view kResizeArrayMethodPrefix = "resize_";
ABSL_CONST_INIT const absl::string_view kRepeatedFieldArrayGetterPostfix =
"upb_array";
ABSL_CONST_INIT const absl::string_view
kRepeatedFieldMutableArrayGetterPostfix = "mutable_upb_array";
ABSL_CONST_INIT const absl::string_view kMapGetterPostfix = "upb_map";
ABSL_CONST_INIT const absl::string_view kMutableMapGetterPostfix =
"mutable_upb_map";
struct Prefix {
absl::string_view name;
uint32_t conflict_set;
};
// List of generated accessor prefixes to check against.
// Example:
// optional repeated string phase = 236;
// optional bool clear_phase = 237;
static constexpr absl::string_view kAccessorPrefixes[] = {
kClearMethodPrefix, kDeleteMethodPrefix, kAddToRepeatedMethodPrefix,
kResizeArrayMethodPrefix, kSetMethodPrefix, kHasMethodPrefix};
constexpr uint32_t kAnyField = UINT32_MAX;
std::string ResolveFieldName(const protobuf::FieldDescriptor* field,
const NameToFieldDescriptorMap& field_names) {
absl::string_view field_name = field->name();
for (const auto prefix : kAccessorPrefixes) {
// If field name starts with a prefix such as clear_ and the proto
// contains a field name with trailing end, depending on type of field
// (repeated, map, message) we have a conflict to resolve.
if (absl::StartsWith(field_name, prefix)) {
auto match = field_names.find(field_name.substr(prefix.size()));
if (match != field_names.end()) {
const auto* candidate = match->second;
if (candidate->is_repeated() || candidate->is_map() ||
(candidate->cpp_type() ==
protobuf::FieldDescriptor::CPPTYPE_STRING &&
prefix == kClearMethodPrefix) ||
prefix == kSetMethodPrefix || prefix == kHasMethodPrefix) {
return absl::StrCat(field_name, "_");
}
}
}
}
return std::string(field_name);
}
// Prefixes used by C code generator for field access.
static constexpr std::array<Prefix, 6> kPrefixes{
Prefix{"clear_", kContainerField | kStringField},
Prefix{"delete_", kContainerField},
Prefix{"add_", kContainerField},
Prefix{"resize_", kContainerField},
Prefix{"set_", kAnyField},
Prefix{"has_", kAnyField},
};
// Returns field map by name to use for conflict checks.
NameToFieldDescriptorMap CreateFieldNameMap(
const protobuf::Descriptor* message) {
NameToFieldDescriptorMap field_names;
for (int i = 0; i < message->field_count(); i++) {
const protobuf::FieldDescriptor* field = message->field(i);
field_names.emplace(field->name(), field);
bool HasConflict(absl::string_view name,
const absl::flat_hash_map<std::string, FieldClass>& fields) {
for (const auto& prefix : kPrefixes) {
if (!absl::StartsWith(name, prefix.name)) continue;
auto match = fields.find(name.substr(prefix.name.size()));
if (match == fields.end()) continue;
if (prefix.conflict_set & match->second) return true;
}
return field_names;
return false;
}
NameToFieldDefMap CreateFieldNameMap(upb::MessageDefPtr message) {
NameToFieldDefMap field_names;
field_names.reserve(message.field_count());
for (const auto& field : message.fields()) {
field_names.emplace(field.name(), field);
}
return field_names;
}
} // namespace
std::string ResolveFieldName(upb::FieldDefPtr field,
const NameToFieldDefMap& field_names) {
absl::string_view field_name(field.name());
for (absl::string_view prefix : kAccessorPrefixes) {
// If field name starts with a prefix such as clear_ and the proto
// contains a field name with trailing end, depending on type of field
// (repeated, map, message) we have a conflict to resolve.
if (absl::StartsWith(field_name, prefix)) {
auto match = field_names.find(field_name.substr(prefix.size()));
if (match != field_names.end()) {
const auto candidate = match->second;
if (candidate.IsSequence() || candidate.IsMap() ||
(candidate.ctype() == kUpb_CType_String &&
prefix == kClearMethodPrefix) ||
prefix == kSetMethodPrefix || prefix == kHasMethodPrefix) {
return absl::StrCat(field_name, "_");
}
}
NameMangler::NameMangler(
const absl::flat_hash_map<std::string, FieldClass>& fields) {
for (const auto& pair : fields) {
const std::string& field_name = pair.first;
if (HasConflict(field_name, fields)) {
names_.emplace(field_name, absl::StrCat(field_name, "_"));
}
}
return std::string(field_name);
}
} // namespace generator

@ -13,42 +13,75 @@
#include "absl/base/attributes.h"
#include "absl/container/flat_hash_map.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/descriptor.h"
#include "upb/reflection/def.hpp"
namespace upb {
namespace generator {
using NameToFieldDescriptorMap =
absl::flat_hash_map<absl::string_view, const google::protobuf::FieldDescriptor*>;
enum FieldClass {
kStringField = 1 << 0,
kContainerField = 1 << 1,
kOtherField = 1 << 2,
};
// Returns field name by resolving naming conflicts across
// proto field names (such as clear_ prefixes).
std::string ResolveFieldName(const google::protobuf::FieldDescriptor* field,
const NameToFieldDescriptorMap& field_names);
class NameMangler {
public:
explicit NameMangler(
const absl::flat_hash_map<std::string, FieldClass>& fields);
// Returns field map by name to use for conflict checks.
NameToFieldDescriptorMap CreateFieldNameMap(const google::protobuf::Descriptor* message);
std::string ResolveFieldName(absl::string_view name) const {
auto it = names_.find(name);
return it == names_.end() ? std::string(name) : it->second;
}
using NameToFieldDefMap =
absl::flat_hash_map<absl::string_view, upb::FieldDefPtr>;
private:
// Maps field_name -> mangled_name. If a field name is not in the map, it
// is not mangled.
absl::flat_hash_map<std::string, std::string> names_;
};
// Returns field name by resolving naming conflicts across
// proto field names (such as clear_ prefixes).
std::string ResolveFieldName(upb::FieldDefPtr field,
const NameToFieldDefMap& field_names);
// Here we provide functions for building field lists from both C++ and upb
// reflection. They are templated so as to not actually introduce dependencies
// on either C++ or upb.
// Returns field map by name to use for conflict checks.
NameToFieldDefMap CreateFieldNameMap(upb::MessageDefPtr message);
template <class T>
absl::flat_hash_map<std::string, FieldClass> GetCppFields(const T* descriptor) {
absl::flat_hash_map<std::string, FieldClass> fields;
for (int i = 0; i < descriptor->field_count(); ++i) {
const auto* field = descriptor->field(i);
if (field->is_repeated() || field->is_map()) {
fields.emplace(field->name(), kContainerField);
} else if (field->cpp_type() == field->CPPTYPE_STRING) {
fields.emplace(field->name(), kStringField);
} else {
fields.emplace(field->name(), kOtherField);
}
}
return fields;
}
// Private array getter name postfix for repeated fields.
ABSL_CONST_INIT extern const absl::string_view kRepeatedFieldArrayGetterPostfix;
ABSL_CONST_INIT extern const absl::string_view
kRepeatedFieldMutableArrayGetterPostfix;
template <class T>
absl::flat_hash_map<std::string, FieldClass> GetUpbFields(const T& msg_def) {
absl::flat_hash_map<std::string, FieldClass> fields;
for (const auto field : msg_def.fields()) {
if (field.IsSequence() || field.IsMap()) {
fields.emplace(field.name(), kContainerField);
} else if (field.ctype() == decltype(field)::CType::kUpb_CType_String) {
fields.emplace(field.name(), kStringField);
} else {
fields.emplace(field.name(), kOtherField);
}
}
return fields;
}
// Private getter name postfix for map fields.
ABSL_CONST_INIT extern const absl::string_view kMapGetterPostfix;
ABSL_CONST_INIT extern const absl::string_view kMutableMapGetterPostfix;
ABSL_CONST_INIT const absl::string_view kRepeatedFieldArrayGetterPostfix =
"upb_array";
ABSL_CONST_INIT const absl::string_view
kRepeatedFieldMutableArrayGetterPostfix = "mutable_upb_array";
ABSL_CONST_INIT const absl::string_view kMapGetterPostfix = "upb_map";
ABSL_CONST_INIT const absl::string_view kMutableMapGetterPostfix =
"mutable_upb_map";
} // namespace generator
} // namespace upb

Loading…
Cancel
Save