Reduce stack usage on recursion by making all error generation lazy on

out-of-line functions. This avoids local std::string temporaries and alike.

It also speeds up code because we delay constructing the errors via StrCat
until an error has actually occurred.

PiperOrigin-RevId: 579269516
pull/14638/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent af75cb31f4
commit 70cc55aeb2
  1. 107
      src/google/protobuf/compiler/parser.cc
  2. 62
      src/google/protobuf/compiler/parser.h

@ -201,7 +201,7 @@ bool Parser::TryConsume(absl::string_view text) {
}
}
bool Parser::Consume(absl::string_view text, absl::string_view error) {
bool Parser::Consume(absl::string_view text, ErrorMaker error) {
if (TryConsume(text)) {
return true;
} else {
@ -211,10 +211,11 @@ bool Parser::Consume(absl::string_view text, absl::string_view error) {
}
bool Parser::Consume(absl::string_view text) {
return Consume(text, absl::StrCat("Expected \"", text, "\"."));
return Consume(text,
[&] { return absl::StrCat("Expected \"", text, "\"."); });
}
bool Parser::ConsumeIdentifier(std::string* output, absl::string_view error) {
bool Parser::ConsumeIdentifier(std::string* output, ErrorMaker error) {
if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
*output = input_->current().text;
input_->Next();
@ -225,7 +226,7 @@ bool Parser::ConsumeIdentifier(std::string* output, absl::string_view error) {
}
}
bool Parser::ConsumeInteger(int* output, absl::string_view error) {
bool Parser::ConsumeInteger(int* output, ErrorMaker error) {
if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
uint64_t value = 0;
if (!io::Tokenizer::ParseInteger(input_->current().text,
@ -243,7 +244,7 @@ bool Parser::ConsumeInteger(int* output, absl::string_view error) {
}
}
bool Parser::ConsumeSignedInteger(int* output, absl::string_view error) {
bool Parser::ConsumeSignedInteger(int* output, ErrorMaker error) {
bool is_negative = false;
uint64_t max_value = std::numeric_limits<int32_t>::max();
if (TryConsume("-")) {
@ -258,7 +259,7 @@ bool Parser::ConsumeSignedInteger(int* output, absl::string_view error) {
}
bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
absl::string_view error) {
ErrorMaker error) {
if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
output)) {
@ -283,7 +284,7 @@ bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
return false;
}
bool Parser::ConsumeNumber(double* output, absl::string_view error) {
bool Parser::ConsumeNumber(double* output, ErrorMaker error) {
if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
*output = io::Tokenizer::ParseFloat(input_->current().text);
input_->Next();
@ -320,7 +321,7 @@ bool Parser::ConsumeNumber(double* output, absl::string_view error) {
}
}
bool Parser::ConsumeString(std::string* output, absl::string_view error) {
bool Parser::ConsumeString(std::string* output, ErrorMaker error) {
if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
io::Tokenizer::ParseString(input_->current().text, output);
input_->Next();
@ -372,32 +373,34 @@ bool Parser::ConsumeEndOfDeclaration(absl::string_view text,
if (TryConsumeEndOfDeclaration(text, location)) {
return true;
} else {
RecordError(absl::StrCat("Expected \"", text, "\"."));
RecordError([&] { return absl::StrCat("Expected \"", text, "\"."); });
return false;
}
}
// -------------------------------------------------------------------
void Parser::RecordError(int line, int column, absl::string_view error) {
void Parser::RecordError(int line, int column, ErrorMaker error) {
if (error_collector_ != nullptr) {
error_collector_->RecordError(line, column, error);
error_collector_->RecordError(line, column, error.get());
}
had_errors_ = true;
}
void Parser::RecordError(absl::string_view error) {
void Parser::RecordError(ErrorMaker error) {
RecordError(input_->current().line, input_->current().column, error);
}
void Parser::RecordWarning(int line, int column, absl::string_view warning) {
void Parser::RecordWarning(int line, int column, ErrorMaker error) {
if (error_collector_ != nullptr) {
error_collector_->RecordWarning(line, column, warning);
error_collector_->RecordWarning(line, column, error.get());
}
}
void Parser::RecordWarning(absl::string_view warning) {
RecordWarning(input_->current().line, input_->current().column, warning);
// Invokes error_collector_->RecordWarning() with the line and column number
// of the current token.
void Parser::RecordWarning(ErrorMaker error) {
RecordWarning(input_->current().line, input_->current().column, error);
}
// -------------------------------------------------------------------
@ -590,12 +593,13 @@ bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
}
if (has_allow_alias && !allow_alias) {
std::string error = absl::StrCat(
// This needlessly clutters declarations with nops.
RecordError([=] {
return absl::StrCat(
"\"", proto->name(),
"\" declares 'option allow_alias = false;' which has no effect. "
"Please remove the declaration.");
// This needlessly clutters declarations with nops.
RecordError(error);
});
return false;
}
@ -611,14 +615,15 @@ bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
}
}
if (allow_alias && !has_duplicates) {
std::string error = absl::StrCat(
// Generate an error if an enum declares support for duplicate enum values
// and does not use it protect future authors.
RecordError([=] {
return absl::StrCat(
"\"", proto->name(),
"\" declares support for enum aliases but no enum values share field "
"numbers. Please remove the unnecessary 'option allow_alias = true;' "
"declaration.");
// Generate an error if an enum declares support for duplicate enum values
// and does not use it protect future authors.
RecordError(error);
});
return false;
}
@ -627,9 +632,13 @@ bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
if (!allow_alias) {
for (const auto& enum_value : proto->value()) {
if (!IsUpperUnderscore(enum_value.name())) {
RecordWarning(absl::StrCat(
"Enum constant should be in UPPER_CASE. Found: ", enum_value.name(),
". See https://developers.google.com/protocol-buffers/docs/style"));
RecordWarning([&] {
return absl::StrCat(
"Enum constant should be in UPPER_CASE. Found: ",
enum_value.name(),
". See "
"https://developers.google.com/protocol-buffers/docs/style");
});
}
}
}
@ -732,8 +741,9 @@ bool Parser::ParseSyntaxIdentifier(const FileDescriptorProto* file,
if (has_edition) {
if (!Edition_Parse(absl::StrCat("EDITION_", syntax), &edition_) ||
edition_ < Edition::EDITION_2023) {
RecordError(syntax_token.line, syntax_token.column,
absl::StrCat("Unknown edition \"", syntax, "\"."));
RecordError(syntax_token.line, syntax_token.column, [&] {
return absl::StrCat("Unknown edition \"", syntax, "\".");
});
return false;
}
syntax_identifier_ = "editions";
@ -743,10 +753,11 @@ bool Parser::ParseSyntaxIdentifier(const FileDescriptorProto* file,
syntax_identifier_ = syntax;
if (syntax != "proto2" && syntax != "proto3" &&
!stop_after_syntax_identifier_) {
RecordError(syntax_token.line, syntax_token.column,
absl::StrCat("Unrecognized syntax identifier \"", syntax,
RecordError(syntax_token.line, syntax_token.column, [&] {
return absl::StrCat("Unrecognized syntax identifier \"", syntax,
"\". This parser "
"only recognizes \"proto2\" and \"proto3\"."));
"only recognizes \"proto2\" and \"proto3\".");
});
return false;
}
@ -847,9 +858,12 @@ bool Parser::ParseMessageDefinition(
DescriptorPool::ErrorCollector::NAME);
DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
if (!IsUpperCamelCase(message->name())) {
RecordWarning(absl::StrCat(
"Message name should be in UpperCamelCase. Found: ", message->name(),
". See https://developers.google.com/protocol-buffers/docs/style"));
RecordWarning([=] {
return absl::StrCat(
"Message name should be in UpperCamelCase. Found: ",
message->name(),
". See https://developers.google.com/protocol-buffers/docs/style");
});
}
}
DO(ParseMessageBlock(message, message_location, containing_file));
@ -1080,15 +1094,19 @@ bool Parser::ParseMessageFieldNoLabel(
DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
if (!IsLowerUnderscore(field->name())) {
RecordWarning(absl::StrCat(
RecordWarning([=] {
return absl::StrCat(
"Field name should be lowercase. Found: ", field->name(),
". See: https://developers.google.com/protocol-buffers/docs/style"));
". See: https://developers.google.com/protocol-buffers/docs/style");
});
}
if (IsNumberFollowUnderscore(field->name())) {
RecordWarning(absl::StrCat(
RecordWarning([=] {
return absl::StrCat(
"Number should not come right after an underscore. Found: ",
field->name(),
". See: https://developers.google.com/protocol-buffers/docs/style"));
". See: https://developers.google.com/protocol-buffers/docs/style");
});
}
}
DO(Consume("=", "Missing field number."));
@ -1819,18 +1837,17 @@ bool Parser::ParseReserved(DescriptorProto* message,
}
}
bool Parser::ParseReservedName(std::string* name,
absl::string_view error_message) {
bool Parser::ParseReservedName(std::string* name, ErrorMaker error_message) {
// Capture the position of the token, in case we have to report an
// error after it is consumed.
int line = input_->current().line;
int col = input_->current().column;
DO(ConsumeString(name, error_message));
if (!io::Tokenizer::IsIdentifier(*name)) {
RecordWarning(
line, col,
absl::StrFormat("Reserved name \"%s\" is not a valid identifier.",
*name));
RecordWarning(line, col, [=] {
return absl::StrFormat("Reserved name \"%s\" is not a valid identifier.",
*name);
});
}
return true;
}
@ -1847,7 +1864,7 @@ bool Parser::ParseReservedNames(DescriptorProto* message,
}
bool Parser::ParseReservedIdentifier(std::string* name,
absl::string_view error_message) {
ErrorMaker error_message) {
DO(ConsumeIdentifier(name, error_message));
return true;
}

@ -16,6 +16,7 @@
#include <cstdint>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/container/flat_hash_map.h"
@ -139,33 +140,65 @@ class PROTOBUF_EXPORT Parser {
// true. Otherwise, return false without logging an error.
bool TryConsume(absl::string_view text);
// In the following functions the error is passed as a lazily evaluated
// callable to reduce stack usage and delay the actual execution of the error
// statement.
// Super simple type erasure interface. Similar to absl::FunctionRef but takes
// the callable by value. Optimized for lambdas with at most a single pointer
// as payload.
class ErrorMaker {
using StorageT = void*;
public:
template <typename F,
typename = std::enable_if_t<std::is_same<
std::string, decltype(std::declval<F>()())>::value>>
ErrorMaker(F f) {
static_assert(sizeof(F) <= sizeof(StorageT), "");
static_assert(alignof(F) <= alignof(StorageT), "");
static_assert(std::is_trivially_destructible<F>::value, "");
::new (static_cast<void*>(storage_)) F(f);
func_ = [](const void* p) { return (*reinterpret_cast<const F*>(p))(); };
}
// This overload helps callers that just want to pass a literal string.
ErrorMaker(const char* error) : error_(error), func_(nullptr) {}
std::string get() const { return func_ ? func_(storage_) : error_; }
private:
union {
alignas(StorageT) char storage_[sizeof(StorageT)];
const char* error_;
};
std::string (*func_)(const void*);
};
// These attempt to read some kind of token from the input. If successful,
// they return true. Otherwise they return false and add the given error
// to the error list.
// Consume a token with the exact text given.
bool Consume(absl::string_view text, absl::string_view error);
bool Consume(absl::string_view text, ErrorMaker error);
// Same as above, but automatically generates the error "Expected \"text\".",
// where "text" is the expected token text.
bool Consume(absl::string_view text);
// Consume a token of type IDENTIFIER and store its text in "output".
bool ConsumeIdentifier(std::string* output, absl::string_view error);
bool ConsumeIdentifier(std::string* output, ErrorMaker error);
// Consume an integer and store its value in "output".
bool ConsumeInteger(int* output, absl::string_view error);
bool ConsumeInteger(int* output, ErrorMaker error);
// Consume a signed integer and store its value in "output".
bool ConsumeSignedInteger(int* output, absl::string_view error);
bool ConsumeSignedInteger(int* output, ErrorMaker error);
// Consume a 64-bit integer and store its value in "output". If the value
// is greater than max_value, an error will be reported.
bool ConsumeInteger64(uint64_t max_value, uint64_t* output,
absl::string_view error);
bool ConsumeInteger64(uint64_t max_value, uint64_t* output, ErrorMaker error);
// Try to consume a 64-bit integer and store its value in "output". No
// error is reported on failure, allowing caller to consume token another way.
bool TryConsumeInteger64(uint64_t max_value, uint64_t* output);
// Consume a number and store its value in "output". This will accept
// tokens of either INTEGER or FLOAT type.
bool ConsumeNumber(double* output, absl::string_view error);
bool ConsumeNumber(double* output, ErrorMaker error);
// Consume a string literal and store its (unescaped) value in "output".
bool ConsumeString(std::string* output, absl::string_view error);
bool ConsumeString(std::string* output, ErrorMaker error);
// Consume a token representing the end of the statement. Comments between
// this token and the next will be harvested for documentation. The given
@ -188,18 +221,18 @@ class PROTOBUF_EXPORT Parser {
// Error logging helpers
// Invokes error_collector_->RecordError(), if error_collector_ is not NULL.
void RecordError(int line, int column, absl::string_view error);
PROTOBUF_NOINLINE void RecordError(int line, int column, ErrorMaker error);
// Invokes error_collector_->RecordError() with the line and column number
// of the current token.
void RecordError(absl::string_view error);
PROTOBUF_NOINLINE void RecordError(ErrorMaker error);
// Invokes error_collector_->RecordWarning(), if error_collector_ is not NULL.
void RecordWarning(int line, int column, absl::string_view warning);
PROTOBUF_NOINLINE void RecordWarning(int line, int column, ErrorMaker error);
// Invokes error_collector_->RecordWarning() with the line and column number
// of the current token.
void RecordWarning(absl::string_view warning);
PROTOBUF_NOINLINE void RecordWarning(ErrorMaker error);
// Records a location in the SourceCodeInfo.location table (see
// descriptor.proto). We use RAII to ensure that the start and end locations
@ -379,11 +412,10 @@ class PROTOBUF_EXPORT Parser {
const LocationRecorder& message_location);
bool ParseReservedNames(DescriptorProto* message,
const LocationRecorder& parent_location);
bool ParseReservedName(std::string* name, absl::string_view error_message);
bool ParseReservedName(std::string* name, ErrorMaker error_message);
bool ParseReservedIdentifiers(DescriptorProto* message,
const LocationRecorder& parent_location);
bool ParseReservedIdentifier(std::string* name,
absl::string_view error_message);
bool ParseReservedIdentifier(std::string* name, ErrorMaker error_message);
bool ParseReservedNumbers(DescriptorProto* message,
const LocationRecorder& parent_location);
bool ParseReserved(EnumDescriptorProto* message,

Loading…
Cancel
Save