Automated rollback of commit 1e67de4ed5.

PiperOrigin-RevId: 526143586
pull/12504/head
Protobuf Team Bot 2 years ago committed by Copybara-Service
parent 81106b10dc
commit 908de8d74a
  1. 131
      src/google/protobuf/generated_enum_util.cc
  2. 58
      src/google/protobuf/generated_enum_util.h
  3. 134
      src/google/protobuf/generated_enum_util_test.cc

@ -31,16 +31,9 @@
#include "google/protobuf/generated_enum_util.h" #include "google/protobuf/generated_enum_util.h"
#include <algorithm> #include <algorithm>
#include <utility>
#include <vector>
#include "absl/log/absl_check.h"
#include "absl/types/optional.h"
#include "google/protobuf/generated_message_util.h" #include "google/protobuf/generated_message_util.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
namespace google { namespace google {
namespace protobuf { namespace protobuf {
namespace internal { namespace internal {
@ -97,130 +90,6 @@ bool InitializeEnumStrings(
return true; return true;
} }
bool ValidateEnum(int value, const uint16_t* data) {
return ValidateEnumInlined(value, data);
}
struct EytzingerLayoutSorter {
absl::Span<const int32_t> input;
absl::Span<uint16_t> output;
int i;
void Sort(size_t output_index = 1) {
if (output_index <= input.size()) {
Sort(2 * output_index);
output[output_index * 2 - 2] = input[i];
output[output_index * 2 - 1] = input[i] >> 16;
i++;
Sort(2 * output_index + 1);
}
}
};
std::vector<uint16_t> GenerateEnumData(const std::vector<int32_t>& values) {
const auto sorted_and_unique = [&] {
for (size_t i = 0; i + 1 < values.size(); ++i) {
if (values[i] >= values[i + 1]) return false;
}
return true;
};
ABSL_DCHECK(sorted_and_unique());
std::vector<int32_t> fallback_values_too_large, fallback_values_after_bitmap;
std::vector<uint16_t> bitmap_values;
absl::optional<int16_t> start_sequence;
uint16_t sequence_length = 0;
for (int32_t v : values) {
if (static_cast<int16_t>(v) != v) {
fallback_values_too_large.push_back(v);
continue;
}
// If we don't yet have a sequence, start it.
if (!start_sequence.has_value()) {
start_sequence = v;
sequence_length = 1;
continue;
}
// If we can extend the sequence, do so.
if (v == *start_sequence + sequence_length && sequence_length < 0xFFFF) {
++sequence_length;
continue;
}
// We adjust the bitmap values to be relative to the end of the sequence.
const auto adjust = [&](int32_t v) -> uint32_t {
// Cast to int64_t first to avoid overflow. The result is guaranteed to be
// positive and fit in uint32_t.
return static_cast<int64_t>(v) - *start_sequence - sequence_length;
};
const uint32_t adjusted = adjust(v);
const auto add_bit = [&](uint32_t bit) {
bitmap_values[bit / 16] |= 1 << (bit % 16);
};
// If we can fit it on the already allocated bitmap, do so.
if (adjusted < 16 * bitmap_values.size()) {
// We can fit it in the existing bitmap.
ABSL_DCHECK_EQ(fallback_values_after_bitmap.size(), 0);
add_bit(adjusted);
continue;
}
// We can't fit in the sequence and we can't fit in the current bitmap.
// Evaluate if it is better to add to fallback, or to collapse all the
// fallback values after the bitmap into the bitmap.
const size_t cost_if_fallback =
sizeof(uint16_t) * bitmap_values.size() +
sizeof(int32_t) * (1 + fallback_values_after_bitmap.size());
const size_t rounded_bitmap_size = (adjusted + 1 + 15) / 16;
const size_t cost_if_collapse = sizeof(uint16_t) * rounded_bitmap_size;
if (cost_if_collapse <= cost_if_fallback) {
// Collapse the existing values, and add the new one.
ABSL_DCHECK_GT(rounded_bitmap_size, bitmap_values.size());
bitmap_values.resize(rounded_bitmap_size);
for (int32_t to_collapse : fallback_values_after_bitmap) {
add_bit(adjust(to_collapse));
}
fallback_values_after_bitmap.clear();
add_bit(adjusted);
} else {
fallback_values_after_bitmap.push_back(v);
}
}
std::vector<int32_t> fallback_values;
if (fallback_values_after_bitmap.empty()) {
fallback_values = std::move(fallback_values_too_large);
} else if (fallback_values_too_large.empty()) {
fallback_values = std::move(fallback_values_after_bitmap);
} else {
fallback_values.resize(fallback_values_too_large.size() +
fallback_values_after_bitmap.size());
std::merge(fallback_values_too_large.begin(),
fallback_values_too_large.end(),
fallback_values_after_bitmap.begin(),
fallback_values_after_bitmap.end(), &fallback_values[0]);
}
std::vector<uint16_t> output(
4 /* seq start + seq len + bitmap len + ordered len */ +
bitmap_values.size() + 2 * fallback_values.size());
output[0] = start_sequence.value_or(0);
output[1] = sequence_length;
output[2] = 16 * bitmap_values.size();
output[3] = fallback_values.size();
auto sorted_start =
std::copy(bitmap_values.begin(), bitmap_values.end(), output.data() + 4);
EytzingerLayoutSorter{
fallback_values, absl::MakeSpan(sorted_start, 2 * fallback_values.size())}
.Sort();
return output;
}
} // namespace internal } // namespace internal
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google

@ -31,10 +31,7 @@
#ifndef GOOGLE_PROTOBUF_GENERATED_ENUM_UTIL_H__ #ifndef GOOGLE_PROTOBUF_GENERATED_ENUM_UTIL_H__
#define GOOGLE_PROTOBUF_GENERATED_ENUM_UTIL_H__ #define GOOGLE_PROTOBUF_GENERATED_ENUM_UTIL_H__
#include <cstdint>
#include <string>
#include <type_traits> #include <type_traits>
#include <vector>
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "google/protobuf/message_lite.h" #include "google/protobuf/message_lite.h"
@ -78,61 +75,6 @@ PROTOBUF_EXPORT bool InitializeEnumStrings(
const EnumEntry* enums, const int* sorted_indices, size_t size, const EnumEntry* enums, const int* sorted_indices, size_t size,
internal::ExplicitlyConstructed<std::string>* enum_strings); internal::ExplicitlyConstructed<std::string>* enum_strings);
// The enum validation format is split in 3 parts:
// - A dense sequence, with start+length
// - A variable size presence bitmap.
// - A variable size sorted int32_t set for everything else. The int32's are
// stored in 2 uint16_t each in little endian.
//
// The values are as follows:
//
// 0 - [ sequence start (int16_t) ]
// 1 - [ sequence length (uint16_t) ]
// 2 - [ bitmap length in bits (uint16_t) ]
// 3 - [ ordered length (uint16_t) ]
// x - [ variable length bitmap ]
// y - [ variable length of int32_t values ]
//
// Where the bitmap starts right after the end of the sequence.
PROTOBUF_EXPORT bool ValidateEnum(int value, const uint16_t* data);
PROTOBUF_EXPORT std::vector<uint16_t> GenerateEnumData(
const std::vector<int32_t>& values);
inline PROTOBUF_ALWAYS_INLINE bool ValidateEnumInlined(int value,
const uint16_t* data) {
const int16_t min_seq = static_cast<int16_t>(data[0]);
const uint16_t length_seq = data[1];
uint64_t adjusted =
static_cast<uint64_t>(static_cast<int64_t>(value)) - min_seq;
if (PROTOBUF_PREDICT_TRUE(adjusted < length_seq)) {
return true;
}
const uint16_t length_bitmap = data[2];
adjusted -= length_seq;
if (PROTOBUF_PREDICT_TRUE(adjusted < length_bitmap)) {
data += 4;
#if defined(__x86_64__) && defined(__GNUC__)
bool result;
asm("bt %1, %2" : "=@ccc"(result) : "r"(adjusted), "m"(*data));
return result;
#else
return data[adjusted / 16] & (1 << (adjusted % 16));
#endif
}
const uint16_t num_sorted = data[3];
data += 4 + length_bitmap / 16;
size_t pos = 0;
while (pos < num_sorted) {
auto it = data + pos * 2;
auto sample =
(static_cast<int32_t>(it[1]) << 16) | static_cast<int32_t>(it[0]);
if (sample == value) return true;
pos = 2 * pos + (sample > value ? 1 : 2);
}
return false;
}
} // namespace internal } // namespace internal
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google

@ -1,134 +0,0 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "google/protobuf/generated_enum_util.h"
#include <algorithm>
#include <cstdint>
#include <limits>
#include <utility>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/container/flat_hash_set.h"
#include "absl/types/span.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
using testing::ElementsAre;
using testing::ElementsAreArray;
using testing::UnorderedElementsAreArray;
namespace google {
namespace protobuf {
namespace internal {
namespace {
void ValidValues(absl::Span<const uint16_t> data, int min, int max,
absl::flat_hash_set<int>& out) {
for (int64_t i = min; i <= max; ++i) {
if (ValidateEnum(i, data.begin())) out.insert(i);
}
}
std::vector<int32_t> ValidValues(absl::Span<const uint16_t> data, int min,
int max) {
absl::flat_hash_set<int> s;
ValidValues(data, min, max, s);
std::vector<int32_t> out(s.begin(), s.end());
std::sort(out.begin(), out.end());
return out;
}
TEST(ValidateEnumTest, SequentialRangeTest) {
EXPECT_THAT(ValidValues({0, 0, 0, 0}, -100, 100), ElementsAre());
EXPECT_THAT(ValidValues({5, 3, 0, 0}, -100, 100), ElementsAre(5, 6, 7));
EXPECT_THAT(ValidValues({static_cast<uint16_t>(-2), 10, 0, 0}, -100, 100),
ElementsAre(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7));
}
TEST(ValidateEnumTest, BitmapRangeTest) {
EXPECT_THAT(ValidValues({0, 0, 16, 0, 0b10011010101}, -100, 100),
ElementsAre(0, 2, 4, 6, 7, 10));
EXPECT_THAT(ValidValues({0, 0, 48, 0, 1 << 4, 1 << 5, 1 << 6}, -100, 100),
ElementsAre(4, 16 + 5, 32 + 6));
}
TEST(ValidateEnumTest, GenerateEnumDataSequential) {
EXPECT_THAT(GenerateEnumData({0, 1, 2, 3}), ElementsAre(0, 4, 0, 0));
EXPECT_THAT(GenerateEnumData({-2, -1, 0, 1, 2, 3}),
ElementsAre(static_cast<uint16_t>(-2), 6, 0, 0));
}
void TestRoundTrip(const std::vector<int32_t>& values) {
auto encoded = GenerateEnumData(values);
absl::flat_hash_set<int> s;
// Add a few test values in case `values` is empty.
ValidValues(encoded, -100, 100, s);
// We look at a few values around the expected ones.
// We could in theory test the whole int32_t domain, but that takes too long
// to run.
for (int32_t v : values) {
int32_t min =
std::max(static_cast<int64_t>(v) - 100,
static_cast<int64_t>(std::numeric_limits<int32_t>::min()));
int32_t max =
std::min(static_cast<int64_t>(v) + 100,
static_cast<int64_t>(std::numeric_limits<int32_t>::max()));
ValidValues(encoded, min, max, s);
}
EXPECT_THAT(s, UnorderedElementsAreArray(values))
<< testing::PrintToString(encoded);
}
TEST(ValidateEnumTest, GenerateEnumDataBitmap) {
EXPECT_THAT(GenerateEnumData({0, 1, 2, 4, 8, 16}),
ElementsAre(0, 3, 16, 0, 0b10000000100010));
TestRoundTrip({});
TestRoundTrip({0, 1, 2, 4, 8, 16});
TestRoundTrip({0, 1, 2, 4, 8, 16, 32, 64, 128, 256});
TestRoundTrip({10000, 10001, 10002, 10004, 10006, 10008, 10010});
TestRoundTrip({std::numeric_limits<int32_t>::min(), -123123, -123, 213,
213213, std::numeric_limits<int32_t>::max()});
}
} // namespace
} // namespace internal
} // namespace protobuf
} // namespace google
#include "google/protobuf/port_undef.inc"
Loading…
Cancel
Save