Optimize TDP table generation:

- For enums that are already defined in a sequential manner, use the existing info from the descriptor.
 - For other enums use a bit set to find unique values instead of a vector+sort+unique. Much less work to do so, and we can use stack for it instead of heap most of the time.

PiperOrigin-RevId: 623613446
pull/16462/head
Protobuf Team Bot 8 months ago committed by Copybara-Service
parent 8433cdc1e3
commit 778cae4102
  1. 1
      src/google/protobuf/BUILD.bazel
  2. 7
      src/google/protobuf/descriptor.h
  3. 83
      src/google/protobuf/generated_message_tctable_gen.cc

@ -626,6 +626,7 @@ cc_library(
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/base:dynamic_annotations",
"@com_google_absl//absl/container:btree",
"@com_google_absl//absl/container:fixed_array",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/functional:any_invocable",

@ -284,6 +284,8 @@ class PROTOBUF_EXPORT InternalFeatureHelper {
PROTOBUF_EXPORT absl::string_view ShortEditionName(Edition edition);
bool IsEnumFullySequential(const EnumDescriptor* enum_desc);
} // namespace internal
// Provide an Abseil formatter for edition names.
@ -1387,6 +1389,7 @@ class PROTOBUF_EXPORT EnumDescriptor : private internal::SymbolBase {
private:
friend class Symbol;
friend bool internal::IsEnumFullySequential(const EnumDescriptor* enum_desc);
typedef EnumOptions OptionsType;
// Allows access to GetLocationPath for annotations.
@ -2797,6 +2800,10 @@ inline const FileDescriptor* FileDescriptor::weak_dependency(int index) const {
namespace internal {
inline bool IsEnumFullySequential(const EnumDescriptor* enum_desc) {
return enum_desc->sequential_value_limit_ == enum_desc->value_count() - 1;
}
// FieldRange(desc) provides an iterable range for the fields of a
// descriptor type, appropriate for range-for loops.

@ -15,6 +15,7 @@
#include <utility>
#include <vector>
#include "absl/container/fixed_array.h"
#include "absl/log/absl_check.h"
#include "absl/numeric/bits.h"
#include "absl/strings/str_cat.h"
@ -44,31 +45,71 @@ bool TreatEnumAsInt(const FieldDescriptor* field) {
field->containing_type()->map_value() == field);
}
bool GetEnumValidationRange(const EnumDescriptor* enum_type, int16_t& start,
bool SetEnumValidationRange(int start_value, int64_t size_value, int16_t& start,
uint16_t& size) {
ABSL_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString();
if (static_cast<int16_t>(start_value) != start_value) {
return false;
}
// Check if the enum values are a single, contiguous range.
std::vector<int> enum_values;
for (int i = 0, N = static_cast<int>(enum_type->value_count()); i < N; ++i) {
enum_values.push_back(enum_type->value(i)->number());
}
auto values_begin = enum_values.begin();
auto values_end = enum_values.end();
std::sort(values_begin, values_end);
enum_values.erase(std::unique(values_begin, values_end), values_end);
if (std::numeric_limits<int16_t>::min() <= enum_values[0] &&
enum_values[0] <= std::numeric_limits<int16_t>::max() &&
enum_values.size() <= std::numeric_limits<uint16_t>::max() &&
static_cast<int>(enum_values[0] + enum_values.size() - 1) ==
enum_values.back()) {
start = static_cast<int16_t>(enum_values[0]);
size = static_cast<uint16_t>(enum_values.size());
return true;
} else {
if (static_cast<uint16_t>(size_value) != size_value) {
return false;
}
start = start_value;
size = size_value;
return true;
}
bool GetEnumValidationRangeSlow(const EnumDescriptor* enum_type, int16_t& start,
uint16_t& size) {
const auto val = [&](int index) { return enum_type->value(index)->number(); };
int min = val(0);
int max = min;
for (int i = 1, N = static_cast<int>(enum_type->value_count()); i < N; ++i) {
min = std::min(min, val(i));
max = std::max(max, val(i));
}
// int64 because max-min can overflow int.
int64_t range = static_cast<int64_t>(max) - static_cast<int64_t>(min) + 1;
if (enum_type->value_count() < range) {
// There are not enough values to fill the range. Exit early.
return false;
}
if (!SetEnumValidationRange(min, range, start, size)) {
// Don't even bother on checking for a dense range if we can't represent the
// min/max in the output.
return false;
}
absl::FixedArray<uint64_t> array((range + 63) / 64);
array.fill(0);
int unique_count = 0;
for (int i = 0, N = static_cast<int>(enum_type->value_count()); i < N; ++i) {
size_t index = val(i) - min;
uint64_t& v = array[index / 64];
size_t bit_pos = index % 64;
unique_count += (v & (uint64_t{1} << bit_pos)) == 0;
v |= uint64_t{1} << bit_pos;
}
return unique_count == range;
}
bool GetEnumValidationRange(const EnumDescriptor* enum_type, int16_t& start,
uint16_t& size) {
if (!IsEnumFullySequential(enum_type)) {
// Maybe the labels are not sequential in declaration order, but the values
// could still be a dense range. Try the slower approach.
return GetEnumValidationRangeSlow(enum_type, start, size);
}
return SetEnumValidationRange(enum_type->value(0)->number(),
enum_type->value_count(), start, size);
}
enum class EnumRangeInfo {

Loading…
Cancel
Save