Add a tokenizer inside of io::Printer.

This change extracts the existing tokenization logic into one function and
tweaks it slightly, making it easier to understand how format strings are tokenized
at the cost of a few allocations.

Most of the changes in generated code are due to slight changes in how indentation are handled that harmlessly confuse some odd uses of cpp::Formatter; Emit() is not impacted.

PiperOrigin-RevId: 494193350
pull/11223/head
Protobuf Team Bot 2 years ago committed by Copybara-Service
parent cdc56ab1b7
commit c5c5332d80
  1. 1
      src/google/protobuf/compiler/plugin.pb.h
  2. 1
      src/google/protobuf/descriptor.pb.h
  3. 708
      src/google/protobuf/io/printer.cc
  4. 30
      src/google/protobuf/io/printer.h
  5. 28
      src/google/protobuf/io/printer_unittest.cc

@ -966,6 +966,7 @@ class PROTOC_EXPORT CodeGeneratorResponse final :
union { Impl_ _impl_; }; union { Impl_ _impl_; };
friend struct ::TableStruct_google_2fprotobuf_2fcompiler_2fplugin_2eproto; friend struct ::TableStruct_google_2fprotobuf_2fcompiler_2fplugin_2eproto;
}; };
// =================================================================== // ===================================================================

@ -8291,6 +8291,7 @@ class PROTOBUF_EXPORT GeneratedCodeInfo final :
union { Impl_ _impl_; }; union { Impl_ _impl_; };
friend struct ::TableStruct_google_2fprotobuf_2fdescriptor_2eproto; friend struct ::TableStruct_google_2fprotobuf_2fdescriptor_2eproto;
}; };
// =================================================================== // ===================================================================

@ -52,6 +52,7 @@
#include "absl/strings/match.h" #include "absl/strings/match.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h" #include "absl/strings/str_format.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "absl/strings/strip.h" #include "absl/strings/strip.h"
#include "absl/types/optional.h" #include "absl/types/optional.h"
@ -62,54 +63,6 @@ namespace google {
namespace protobuf { namespace protobuf {
namespace io { namespace io {
namespace { namespace {
// Returns the number of spaces of the first non empty line.
size_t RawStringIndentLen(absl::string_view format) {
// We are processing a call that looks like
//
// p->Emit(R"cc(
// class Foo {
// int x, y, z;
// };
// )cc");
//
// or
//
// p->Emit(R"cc(
//
// class Foo {
// int x, y, z;
// };
// )cc");
//
// To compute the indent, we need to discard all leading newlines, then
// count all spaces until we reach a non-space; this run of spaces is
// stripped off at the start of each line.
size_t len = 0;
while (absl::ConsumePrefix(&format, "\n")) {
}
while (absl::ConsumePrefix(&format, " ")) {
++len;
}
return len;
}
// Returns the amount of additional indenting past `raw_string_indent_len`.
size_t ConsumeIndentForLine(size_t raw_string_indent_len,
absl::string_view& format) {
size_t total_indent = 0;
while (absl::ConsumePrefix(&format, " ")) {
++total_indent;
}
if (total_indent < raw_string_indent_len) {
total_indent = 0;
} else {
total_indent -= raw_string_indent_len;
}
return total_indent;
}
template <typename T> template <typename T>
absl::optional<T> LookupInFrameStack( absl::optional<T> LookupInFrameStack(
absl::string_view var, absl::string_view var,
@ -124,6 +77,170 @@ absl::optional<T> LookupInFrameStack(
} }
} // namespace } // namespace
Printer::Format Printer::TokenizeFormat(absl::string_view format_string,
const PrintOptions& options) {
Format format;
size_t raw_string_indent = 0;
if (options.strip_raw_string_indentation) {
// We are processing a call that looks like
//
// p->Emit(R"cc(
// class Foo {
// int x, y, z;
// };
// )cc");
//
// or
//
// p->Emit(R"cc(
//
// class Foo {
// int x, y, z;
// };
// )cc");
//
// To compute the indent, we need:
// 1. Iterate over each line.
// 2. Find the first line that contains non-whitespace characters.
// 3. Count the number of leading spaces on that line.
//
// The following pairs of loops assume the current line is the first line
// with non-whitespace characters; if we consume all the spaces and
// then immediately hit a newline, this means this wasn't the right line and
// we should start over.
//
// Note that the very first character *must* be a newline; that is how we
// detect that this is a multi-line raw string template, and as such this is
// a while loop, not a do/while loop.
absl::string_view orig = format_string;
while (absl::ConsumePrefix(&format_string, "\n")) {
raw_string_indent = 0;
format.is_raw_string = true;
while (absl::ConsumePrefix(&format_string, " ")) {
++raw_string_indent;
}
}
// If we consume the entire string, this probably wasn't a raw string and
// was probably something like a couple of explicit newlines.
if (format_string.empty()) {
format_string = orig;
format.is_raw_string = false;
raw_string_indent = 0;
}
}
// We now split the remaining format string into lines and discard:
// 1. All leading spaces to compute that line's indent.
// We do not do this for the first line, so that Emit(" ") works
// correctly. We do this *regardless* of whether we are processing
// a raw string, because existing non-raw-string calls to cpp::Formatter
// rely on this. There is a test that validates this behavior.
//
// 2. Set the indent for that line to max(0, line_indent -
// raw_string_indent), if this is not a raw string.
//
// 3. Trailing empty lines, if we know this is a raw string, except for
// a single extra newline at the end.
//
// Each line is itself split into chunks along the variable delimiters, e.g.
// $...$.
bool is_first = true;
for (absl::string_view line_text : absl::StrSplit(format_string, '\n')) {
size_t line_indent = 0;
while (!is_first && absl::ConsumePrefix(&line_text, " ")) {
++line_indent;
}
is_first = false;
format.lines.emplace_back();
auto& line = format.lines.back();
line.indent =
line_indent > raw_string_indent ? line_indent - raw_string_indent : 0;
bool is_var = false;
size_t total_len = 0;
for (absl::string_view chunk :
absl::StrSplit(line_text, options_.variable_delimiter)) {
// The special _start and _end variables should actually glom the next
// chunk into themselves, so as to be of the form _start$foo and _end$foo.
if (!line.chunks.empty() && !is_var) {
auto& prev = line.chunks.back();
if (prev.text == "_start" || prev.text == "_end") {
// The +1 below is to account for the $ in between them.
// This string is safe, because prev.text and chunk are contiguous
// by construction.
prev.text = absl::string_view(prev.text.data(),
prev.text.size() + 1 + chunk.size());
// Account for the foo$ part of $_start$foo$.
total_len += chunk.size() + 1;
continue;
}
}
if (is_var || !chunk.empty()) {
line.chunks.push_back(Format::Chunk{chunk, is_var});
}
total_len += chunk.size();
if (is_var) {
// This accounts for the $s around a variable.
total_len += 2;
}
is_var = !is_var;
}
// To ensure there are no unclosed $...$, we check that the computed length
// above equals the actual length of the string. If it's off, that means
// that there are missing or extra $ characters.
Validate(total_len == line_text.size(), options, [&line] {
if (line.chunks.empty()) {
return std::string("wrong number of variable delimiters");
}
return absl::StrFormat("unclosed variable name: `%s`",
absl::CHexEscape(line.chunks.back().text));
});
// Trim any empty, non-variable chunks.
while (!line.chunks.empty()) {
auto& last = line.chunks.back();
if (last.is_var || !last.text.empty()) {
break;
}
line.chunks.pop_back();
}
}
// Discard any trailing newlines (i.e., lines which contain no chunks.)
if (format.is_raw_string) {
while (!format.lines.empty() && format.lines.back().chunks.empty()) {
format.lines.pop_back();
}
}
#if 0 // Use this to aid debugging tokenization.
GOOGLE_LOG(INFO) << "--- " << format.lines.size() << " lines";
for (size_t i = 0; i < format.lines.size(); ++i) {
const auto& line = format.lines[i];
auto log_line = absl::StrFormat("[\" \" x %d]", line.indent);
for (const auto& chunk : line.chunks) {
absl::StrAppendFormat(&log_line, " %s\"%s\"", chunk.is_var ? "$" : "",
absl::CHexEscape(chunk.text));
}
GOOGLE_LOG(INFO) << log_line;
}
GOOGLE_LOG(INFO) << "---";
#endif
return format;
}
constexpr absl::string_view Printer::kProtocCodegenTrace; constexpr absl::string_view Printer::kProtocCodegenTrace;
Printer::Printer(ZeroCopyOutputStream* output, Options options) Printer::Printer(ZeroCopyOutputStream* output, Options options)
@ -241,12 +358,7 @@ void Printer::WriteRaw(const char* data, size_t size) {
} }
if (at_start_of_line_ && data[0] != '\n') { if (at_start_of_line_ && data[0] != '\n') {
// Insert an indent. IndentIfAtStart();
at_start_of_line_ = false;
for (size_t i = 0; i < indent_; ++i) {
sink_.Write(" ");
}
if (failed_) { if (failed_) {
return; return;
} }
@ -321,10 +433,10 @@ bool Printer::ValidateIndexLookupInBounds(size_t index,
void Printer::PrintImpl(absl::string_view format, void Printer::PrintImpl(absl::string_view format,
absl::Span<const std::string> args, PrintOptions opts) { absl::Span<const std::string> args, PrintOptions opts) {
// Inside of this function, we set indentation as we print new lines from the // Inside of this function, we set indentation as we print new lines from
// format string. No matter how we exit this function, we should fix up the // the format string. No matter how we exit this function, we should fix up
// indent to what it was before we entered; a cleanup makes it easy to avoid // the indent to what it was before we entered; a cleanup makes it easy to
// this mistake. // avoid this mistake.
size_t original_indent = indent_; size_t original_indent = indent_;
auto unindent = auto unindent =
absl::MakeCleanup([this, original_indent] { indent_ = original_indent; }); absl::MakeCleanup([this, original_indent] { indent_ = original_indent; });
@ -337,312 +449,276 @@ void Printer::PrintImpl(absl::string_view format,
substitutions_.clear(); substitutions_.clear();
} }
size_t raw_string_indent_len = auto fmt = TokenizeFormat(format, opts);
opts.strip_raw_string_indentation ? RawStringIndentLen(format) : 0;
if (opts.strip_raw_string_indentation) {
// We only want to remove a single newline from the input string to allow
// extra newlines at the start to go into the generated code.
absl::ConsumePrefix(&format, "\n");
while (absl::ConsumePrefix(&format, " ")) {
}
}
PrintCodegenTrace(opts.loc); PrintCodegenTrace(opts.loc);
size_t arg_index = 0; size_t arg_index = 0;
bool skip_next_newline = false;
std::vector<AnnotationCollector::Annotation> annot_stack; std::vector<AnnotationCollector::Annotation> annot_stack;
std::vector<std::pair<absl::string_view, size_t>> annot_records; std::vector<std::pair<absl::string_view, size_t>> annot_records;
while (!format.empty()) { for (size_t line_idx = 0; line_idx < fmt.lines.size(); ++line_idx) {
// Skip to the next special character. We do this so that we can delay const auto& line = fmt.lines[line_idx];
// printing "normal" text until we know what kind of variable substitution
// we're doing, since that may require trimming whitespace. // We only print a newline for lines that follow the first; a loop iteration
size_t next_special = 0; // can also hint that we should not emit another newline through the
for (; next_special < format.size(); ++next_special) { // `skip_next_newline` variable.
if (format[next_special] == options_.variable_delimiter || //
format[next_special] == '\n') { // We also assume that double newlines are undesirable, so we
break; // do not emit a newline if we are at the beginning of a line, *unless* the
// previous format line is actually empty. This behavior is specific to
// raw strings.
if (line_idx > 0) {
bool prev_was_empty = fmt.lines[line_idx - 1].chunks.empty();
bool should_skip_newline =
skip_next_newline ||
(fmt.is_raw_string && (at_start_of_line_ && !prev_was_empty));
if (!should_skip_newline) {
line_start_variables_.clear();
sink_.Write("\n");
at_start_of_line_ = true;
} }
} }
skip_next_newline = false;
absl::string_view next_chunk = format.substr(0, next_special); indent_ = original_indent + line.indent;
format = format.substr(next_special);
if (format.empty()) {
PrintRaw(next_chunk);
break;
}
char c = format.front();
format = format.substr(1);
if (c == '\n') {
PrintRaw(next_chunk);
at_start_of_line_ = true;
line_start_variables_.clear();
sink_.Write("\n");
indent_ =
original_indent + ConsumeIndentForLine(raw_string_indent_len, format);
continue;
} else if (c != options_.variable_delimiter) {
PrintRaw(next_chunk);
continue;
}
size_t end = format.find(options_.variable_delimiter); for (size_t chunk_idx = 0; chunk_idx < line.chunks.size(); ++chunk_idx) {
if (!Validate(end != absl::string_view::npos, opts, [format] { auto chunk = line.chunks[chunk_idx];
return absl::StrCat("unclosed variable name: \"",
absl::CHexEscape(format), "\"");
})) {
PrintRaw(next_chunk);
WriteRaw(&options_.variable_delimiter, 1);
PrintRaw(format);
break;
}
absl::string_view match = format.substr(0, end); if (!chunk.is_var) {
absl::string_view var = match; PrintRaw(chunk.text);
format = format.substr(end + 1);
if (var.empty()) {
// `$$` is an escape for just `$`.
PrintRaw(next_chunk);
WriteRaw(&options_.variable_delimiter, 1);
continue;
}
if (opts.use_curly_brace_substitutions && absl::ConsumePrefix(&var, "{")) {
PrintRaw(next_chunk);
if (!Validate(var.size() == 1u, opts, "expected single-digit variable")) {
continue;
}
if (!Validate(absl::ascii_isdigit(var[0]), opts,
"expected digit after {")) {
continue;
}
size_t idx = var[0] - '1';
if (!ValidateIndexLookupInBounds(idx, arg_index, args.size(), opts)) {
continue; continue;
} }
if (idx == arg_index) { if (chunk.text.empty()) {
++arg_index; // `$$` is an escape for just `$`.
} WriteRaw(&options_.variable_delimiter, 1);
IndentIfAtStart();
annot_stack.push_back({{sink_.bytes_written(), 0}, args[idx]});
continue;
} else if (opts.use_curly_brace_substitutions &&
absl::ConsumePrefix(&var, "}")) {
PrintRaw(next_chunk);
// The rest of var is actually ignored, and this is apparently
// public API now. Oops?
if (!Validate(!annot_stack.empty(), opts,
"unexpected end of annotation")) {
continue; continue;
} }
annot_stack.back().first.second = sink_.bytes_written(); // If we get this far, we can conclude the chunk is a substitution
if (options_.annotation_collector != nullptr) { // variable; we rename the `chunk` variable to make this clear below.
options_.annotation_collector->AddAnnotationNew(annot_stack.back()); absl::string_view var = chunk.text;
} if (opts.use_curly_brace_substitutions &&
IndentIfAtStart(); absl::ConsumePrefix(&var, "{")) {
annot_stack.pop_back(); if (!Validate(var.size() == 1u, opts,
continue; "expected single-digit variable")) {
} continue;
}
absl::string_view prefix, suffix; if (!Validate(absl::ascii_isdigit(var[0]), opts,
if (opts.strip_spaces_around_vars) { "expected digit after {")) {
var = absl::StripLeadingAsciiWhitespace(var); continue;
prefix = match.substr(0, match.size() - var.size()); }
var = absl::StripTrailingAsciiWhitespace(var);
suffix = match.substr(prefix.size() + var.size());
}
if (!Validate(!var.empty(), opts, "unexpected empty variable")) { size_t idx = var[0] - '1';
PrintRaw(next_chunk); if (!ValidateIndexLookupInBounds(idx, arg_index, args.size(), opts)) {
continue; continue;
} }
LookupResult sub; if (idx == arg_index) {
absl::optional<AnnotationRecord> same_name_record; ++arg_index;
if (opts.allow_digit_substitutions && absl::ascii_isdigit(var[0])) { }
PrintRaw(next_chunk);
if (!Validate(var.size() == 1u, opts, "expected single-digit variable")) { IndentIfAtStart();
annot_stack.push_back({{sink_.bytes_written(), 0}, args[idx]});
continue; continue;
} }
size_t idx = var[0] - '1'; if (opts.use_curly_brace_substitutions &&
if (!ValidateIndexLookupInBounds(idx, arg_index, args.size(), opts)) { absl::ConsumePrefix(&var, "}")) {
// The rest of var is actually ignored, and this is apparently
// public API now. Oops?
if (!Validate(!annot_stack.empty(), opts,
"unexpected end of annotation")) {
continue;
}
annot_stack.back().first.second = sink_.bytes_written();
if (options_.annotation_collector != nullptr) {
options_.annotation_collector->AddAnnotationNew(annot_stack.back());
}
annot_stack.pop_back();
continue; continue;
} }
if (idx == arg_index) {
++arg_index; absl::string_view prefix, suffix;
if (opts.strip_spaces_around_vars) {
var = absl::StripLeadingAsciiWhitespace(var);
prefix = chunk.text.substr(0, chunk.text.size() - var.size());
var = absl::StripTrailingAsciiWhitespace(var);
suffix = chunk.text.substr(prefix.size() + var.size());
} }
sub = args[idx];
} else if (opts.use_annotation_frames && if (!Validate(!var.empty(), opts, "unexpected empty variable")) {
(var == "_start" || var == "_end")) {
bool is_start = var == "_start";
size_t next_delim = format.find('$');
if (!Validate(next_delim != absl::string_view::npos, opts,
"$_start$ must be followed by a name and another $")) {
PrintRaw(next_chunk);
continue; continue;
} }
auto var = format.substr(0, next_delim); bool is_start = absl::ConsumePrefix(&var, "_start$");
format = format.substr(next_delim + 1); bool is_end = absl::ConsumePrefix(&var, "_end$");
if (opts.use_annotation_frames && (is_start || is_end)) {
if (is_start) { if (is_start) {
PrintRaw(next_chunk); IndentIfAtStart();
IndentIfAtStart(); annot_records.push_back({var, sink_.bytes_written()});
annot_records.push_back({var, sink_.bytes_written()});
// Skip all whitespace immediately after a _start. // Skip all whitespace immediately after a _start.
while (!format.empty() && absl::ascii_isspace(format.front())) { ++chunk_idx;
format = format.substr(1); if (chunk_idx < line.chunks.size()) {
} absl::string_view text = line.chunks[chunk_idx].text;
} else { while (absl::ConsumePrefix(&text, " ")) {
// Skip all whitespace immediately *before* an _end. }
while (!next_chunk.empty() && absl::ascii_isspace(next_chunk.back())) { PrintRaw(text);
next_chunk = next_chunk.substr(0, next_chunk.size() - 1); }
} } else {
PrintRaw(next_chunk); // If a line consisted *only* of an _end, this will likely result in
// a blank line if we do not zap the newline after it, so we do that
// If a line consisted *only* of an _end, this will likely result in // here.
// a blank line if we do not zap the newline after it, and any if (line.chunks.size() == 1) {
// indentation beyond that. skip_next_newline = true;
if (at_start_of_line_) { }
absl::ConsumePrefix(&format, "\n");
indent_ = original_indent + auto record_var = annot_records.back();
ConsumeIndentForLine(raw_string_indent_len, format); annot_records.pop_back();
if (!Validate(record_var.first == var, opts, [record_var, var] {
return absl::StrFormat(
"_start and _end variables must match, but got %s and %s, "
"respectively",
record_var.first, var);
})) {
continue;
}
absl::optional<AnnotationRecord> record =
LookupInFrameStack(var, absl::MakeSpan(annotation_lookups_));
if (!Validate(record.has_value(), opts, [var] {
return absl::StrCat("undefined annotation variable: \"",
absl::CHexEscape(var), "\"");
})) {
continue;
}
if (options_.annotation_collector != nullptr) {
options_.annotation_collector->AddAnnotation(
record_var.second, sink_.bytes_written(), record->file_path,
record->path);
}
} }
auto record_var = annot_records.back(); continue;
annot_records.pop_back(); }
if (!Validate(record_var.first == var, opts, [record_var, var] { LookupResult sub;
return absl::StrFormat( absl::optional<AnnotationRecord> same_name_record;
"_start and _end variables must match, but got %s and %s, " if (opts.allow_digit_substitutions && absl::ascii_isdigit(var[0])) {
"respectively", if (!Validate(var.size() == 1u, opts,
record_var.first, var); "expected single-digit variable")) {
})) {
continue; continue;
} }
absl::optional<AnnotationRecord> record = size_t idx = var[0] - '1';
LookupInFrameStack(var, absl::MakeSpan(annotation_lookups_)); if (!ValidateIndexLookupInBounds(idx, arg_index, args.size(), opts)) {
if (!Validate(record.has_value(), opts, [var] {
return absl::StrCat("undefined variable: \"",
absl::CHexEscape(var), "\"");
})) {
continue; continue;
} }
if (idx == arg_index) {
++arg_index;
}
sub = args[idx];
} else {
sub = LookupInFrameStack(var, absl::MakeSpan(var_lookups_));
if (options_.annotation_collector != nullptr) { if (opts.use_annotation_frames) {
options_.annotation_collector->AddAnnotation( same_name_record =
record_var.second, sink_.bytes_written(), record->file_path, LookupInFrameStack(var, absl::MakeSpan(annotation_lookups_));
record->path);
} }
} }
continue; // By returning here in case of empty we also skip possible spaces inside
} else { // the $...$, i.e. "void$ dllexpor$ f();" -> "void f();" in the empty
PrintRaw(next_chunk); // case.
sub = LookupInFrameStack(var, absl::MakeSpan(var_lookups_)); if (!Validate(sub.has_value(), opts, [var] {
return absl::StrCat("undefined variable: \"", absl::CHexEscape(var),
if (opts.use_annotation_frames) { "\"");
same_name_record = })) {
LookupInFrameStack(var, absl::MakeSpan(annotation_lookups_)); continue;
} }
}
// By returning here in case of empty we also skip possible spaces inside size_t range_start = sink_.bytes_written();
// the $...$, i.e. "void$ dllexpor$ f();" -> "void f();" in the empty case. size_t range_end = sink_.bytes_written();
if (!Validate(sub.has_value(), opts, [var] {
return absl::StrCat("undefined variable: \"", absl::CHexEscape(var),
"\"");
})) {
continue;
}
size_t range_start = sink_.bytes_written(); if (auto* str = absl::get_if<absl::string_view>(&*sub)) {
size_t range_end = sink_.bytes_written(); if (at_start_of_line_ && str->empty()) {
line_start_variables_.emplace_back(var);
}
if (auto* str = absl::get_if<absl::string_view>(&*sub)) { if (!str->empty()) {
if (at_start_of_line_ && str->empty()) { // If `sub` is empty, we do not print the spaces around it.
line_start_variables_.emplace_back(var); PrintRaw(prefix);
} PrintRaw(*str);
range_end = sink_.bytes_written();
range_start = range_end - str->size();
PrintRaw(suffix);
}
} else {
auto* fnc = absl::get_if<std::function<void()>>(&*sub);
GOOGLE_ABSL_CHECK(fnc != nullptr);
if (!str->empty()) { Validate(
// If `sub` is empty, we do not print the spaces around it. prefix.empty() && suffix.empty(), opts,
PrintRaw(prefix); "substitution that resolves to callback cannot contain whitespace");
PrintRaw(*str);
range_start = sink_.bytes_written();
(*fnc)();
range_end = sink_.bytes_written(); range_end = sink_.bytes_written();
range_start = range_end - str->size();
PrintRaw(suffix); // If we just evaluated a closure, and we are at the start of a line,
} // that means it finished with a newline. If a newline follows
} else { // immediately after, we drop it. This helps callback formatting "work
auto* fnc = absl::get_if<std::function<void()>>(&*sub); // as expected" with respect to forms like
GOOGLE_ABSL_CHECK(fnc != nullptr); //
// class Foo {
Validate( // $methods$;
prefix.empty() && suffix.empty(), opts, // };
"substitution that resolves to callback cannot contain whitespace"); //
// Without this line, this would turn into something like
range_start = sink_.bytes_written(); //
(*fnc)(); // class Foo {
range_end = sink_.bytes_written(); // void Bar() {}
//
// If we just evaluated a closure, and we are at the start of a line, that // };
// means it finished with a newline. If a newline follows immediately //
// after, we drop it. This helps callback formatting "work as expected" // in many cases. We *also* do this if a ; or , follows the
// with respect to forms like // substitution, because this helps clang-format keep its head on in
// // many cases. Users that need to keep the semi can write $foo$/**/;
// class Foo { ++chunk_idx;
// $methods$; if (chunk_idx < line.chunks.size()) {
// }; absl::string_view text = line.chunks[chunk_idx].text;
// if (!absl::ConsumePrefix(&text, ";")) {
// Without this line, this would turn into something like absl::ConsumePrefix(&text, ",");
// }
// class Foo { PrintRaw(text);
// void Bar() {} }
//
// };
//
// in many cases. We *also* do this if a ; or , follows the substitution,
// because this helps clang-format keep its head on in many cases.
// Users that need to keep the semi can write $foo$/**/;
if (!absl::ConsumePrefix(&format, ";")) {
absl::ConsumePrefix(&format, ",");
} }
absl::ConsumePrefix(&format, "\n");
indent_ =
original_indent + ConsumeIndentForLine(raw_string_indent_len, format);
}
if (same_name_record.has_value() && if (same_name_record.has_value() &&
options_.annotation_collector != nullptr) { options_.annotation_collector != nullptr) {
options_.annotation_collector->AddAnnotation(range_start, range_end, options_.annotation_collector->AddAnnotation(
same_name_record->file_path, range_start, range_end, same_name_record->file_path,
same_name_record->path); same_name_record->path);
} }
if (opts.use_substitution_map) { if (opts.use_substitution_map) {
auto insertion = substitutions_.emplace( auto insertion = substitutions_.emplace(
std::string(var), std::make_pair(range_start, range_end)); std::string(var), std::make_pair(range_start, range_end));
if (!insertion.second) { if (!insertion.second) {
// This variable was used multiple times. // This variable was used multiple times.
// Make its span have negative length so // Make its span have negative length so
// we can detect it if it gets used in an // we can detect it if it gets used in an
// annotation. // annotation.
insertion.first->second = {1, 0}; insertion.first->second = {1, 0};
}
} }
} }
} }
@ -654,6 +730,12 @@ void Printer::PrintImpl(absl::string_view format,
"annotation range was not closed; expected %c}%c: %s", "annotation range was not closed; expected %c}%c: %s",
options_.variable_delimiter, options_.variable_delimiter, original); options_.variable_delimiter, options_.variable_delimiter, original);
}); });
// For multiline raw strings, we always make sure to end on a newline.
if (fmt.is_raw_string && !at_start_of_line_) {
PrintRaw("\n");
at_start_of_line_ = true;
}
} }
} // namespace io } // namespace io
} // namespace protobuf } // namespace protobuf

@ -63,6 +63,7 @@
namespace google { namespace google {
namespace protobuf { namespace protobuf {
namespace io { namespace io {
// Records annotations about a Printer's output. // Records annotations about a Printer's output.
class PROTOBUF_EXPORT AnnotationCollector { class PROTOBUF_EXPORT AnnotationCollector {
public: public:
@ -803,6 +804,35 @@ class PROTOBUF_EXPORT Printer {
bool use_annotation_frames = true; bool use_annotation_frames = true;
}; };
friend class FormatIterator;
struct Format {
struct Chunk {
// The chunk's text; if this is a variable, it does not include the $...$.
absl::string_view text;
// Whether or not this is a variable name, i.e., a $...$.
bool is_var;
};
struct Line {
// Chunks to emit, split along $ and annotates as to whether it is a
// variable name.
std::vector<Chunk> chunks;
// The indentation for this chunk.
size_t indent;
};
std::vector<Line> lines;
// Whether this is a multiline raw string, according to internal heuristics.
bool is_raw_string = false;
};
Format TokenizeFormat(absl::string_view format_string,
const PrintOptions& options);
// Emit an annotation for the range defined by the given substitution // Emit an annotation for the range defined by the given substitution
// variables, as set by the most recent call to PrintImpl() that set // variables, as set by the most recent call to PrintImpl() that set
// `use_substitution_map` to true. // `use_substitution_map` to true.

@ -547,34 +547,6 @@ TEST_F(PrinterTest, Emit) {
"}\n"); "}\n");
} }
TEST_F(PrinterTest, EmitKeepsExtraLine) {
{
Printer printer(output());
printer.Emit(R"cc(
class Foo {
int x, y, z;
};
)cc");
printer.Emit(R"java(
public final class Bar {
Bar() {}
}
)java");
}
EXPECT_EQ(written(),
"\n"
"class Foo {\n"
" int x, y, z;\n"
"};\n"
"\n"
"public final class Bar {\n"
" Bar() {}\n"
"}\n");
}
TEST_F(PrinterTest, EmitWithSubs) { TEST_F(PrinterTest, EmitWithSubs) {
{ {
Printer printer(output()); Printer printer(output());

Loading…
Cancel
Save