UPB text encoder without using reflection for Rust (used for a message's Debug trait) that will print out field number to value entries instead of field name to value entries of a message like how it's expected for the usual text format using reflection.

General test for it is done in Rust, and then extensions are tested in UPB as they're not currently supported in Rust-upb.

PiperOrigin-RevId: 651113583
pull/17217/head
Protobuf Team Bot 9 months ago committed by Copybara-Service
parent 32bcf0bead
commit f9dd9ce66e
  1. 17
      rust/test/BUILD
  2. 22
      rust/test/upb/BUILD
  3. 88
      rust/test/upb/debug_string_test.rs
  4. 2
      rust/upb/BUILD
  5. 3
      rust/upb/lib.rs
  6. 66
      rust/upb/text.rs
  7. 1
      rust/upb/upb_api.c
  8. 13
      src/google/protobuf/compiler/rust/message.cc
  9. 3
      upb/BUILD
  10. 19
      upb/message/BUILD
  11. 67
      upb/message/compare.c
  12. 78
      upb/message/internal/iterator.c
  13. 35
      upb/message/internal/iterator.h
  14. 76
      upb/text/BUILD
  15. 235
      upb/text/debug_string.c
  16. 42
      upb/text/debug_string.h
  17. 438
      upb/text/encode.c
  18. 12
      upb/text/encode.h
  19. 63
      upb/text/encode_debug_test.cc
  20. 180
      upb/text/internal/encode.c
  21. 240
      upb/text/internal/encode.h
  22. 22
      upb/text/options.h

@ -1,3 +1,9 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2023 Google LLC. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file or at
# https://developers.google.com/open-source/licenses/bsd
load(
"//rust:defs.bzl",
"rust_cc_proto_library",
@ -15,7 +21,10 @@ UNITTEST_EDITION_TARGET = "//src/google/protobuf:test_protos"
rust_upb_proto_library(
name = "unittest_upb_rust_proto",
testonly = True,
visibility = ["//rust/test/shared:__subpackages__"],
visibility = [
"//rust/test/shared:__subpackages__",
"//rust/test/upb:__subpackages__",
],
deps = [UNITTEST_PROTO_TARGET],
)
@ -70,7 +79,10 @@ rust_cc_proto_library(
rust_upb_proto_library(
name = "unittest_edition_upb_rust_proto",
testonly = True,
visibility = ["//rust/test/shared:__subpackages__"],
visibility = [
"//rust/test/shared:__subpackages__",
"//rust/test/upb:__subpackages__",
],
deps = [UNITTEST_EDITION_TARGET],
)
@ -379,6 +391,7 @@ rust_upb_proto_library(
testonly = True,
visibility = [
"//rust/test/shared:__subpackages__",
"//rust/test/upb:__subpackages__",
],
deps = ["//src/google/protobuf:map_unittest_proto"],
)

@ -1,3 +1,9 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2023 Google LLC. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file or at
# https://developers.google.com/open-source/licenses/bsd
# Tests specific to upb kernel.
#
# Only add tests that are cpp kernel specific and it is not possible to make them work for upb (
@ -13,6 +19,8 @@
load("@rules_rust//rust:defs.bzl", "rust_test")
licenses(["notice"])
# TODO: Enable this for the cpp kernel and move these tests to shared.
rust_test(
name = "string_ctypes_test_upb_test",
@ -26,3 +34,17 @@ rust_test(
"@crate_index//:googletest",
],
)
# blaze test //rust/test/upb:debug_string_test --test_arg=--nocapture -c dbg
# --test_output=all to see debug string in test output logs.
rust_test(
name = "debug_string_test",
srcs = ["debug_string_test.rs"],
deps = [
"//rust:protobuf_upb",
"//rust/test:map_unittest_upb_rust_proto",
"//rust/test:unittest_edition_upb_rust_proto",
"//rust/test:unittest_upb_rust_proto",
"@crate_index//:googletest",
],
)

@ -0,0 +1,88 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
use googletest::prelude::*;
use map_unittest_rust_proto::TestMapWithMessages;
use protobuf_upb::proto;
use unittest_rust_proto::{
test_all_types::NestedEnum as NestedEnumProto2,
test_all_types::NestedMessage as NestedMessageProto2, TestAllTypes as TestAllTypesProto2,
};
#[test]
fn test_debug_string() {
let mut msg = proto!(TestAllTypesProto2 {
optional_int32: 42,
optional_string: "Hello World",
optional_nested_enum: NestedEnumProto2::Bar,
oneof_uint32: 452235,
optional_nested_message: proto!(NestedMessageProto2 { bb: 100 }),
});
let mut repeated_string = msg.repeated_string_mut();
repeated_string.push("Hello World");
repeated_string.push("Hello World");
repeated_string.push("Hello World");
let mut msg_map = TestMapWithMessages::new();
println!("EMPTY MSG: {:?}", msg_map); // Make sure that we can print an empty message.
msg_map.map_string_all_types_mut().insert("hello", msg.as_view());
msg_map.map_string_all_types_mut().insert("fizz", msg.as_view());
msg_map.map_string_all_types_mut().insert("boo", msg.as_view());
println!("{:?}", msg_map);
println!("{:?}", msg_map.as_view()); // Make sure that we can print as_view
println!("{:?}", msg_map.as_mut()); // Make sure that we can print as_mut
let golden = r#"12 {
key: "hello"
value {
1: 42
14: "Hello World"
18 {
1: 100
}
21: 2
44: "Hello World"
44: "Hello World"
44: "Hello World"
111: 452235
}
}
12 {
key: "fizz"
value {
1: 42
14: "Hello World"
18 {
1: 100
}
21: 2
44: "Hello World"
44: "Hello World"
44: "Hello World"
111: 452235
}
}
12 {
key: "boo"
value {
1: 42
14: "Hello World"
18 {
1: 100
}
21: 2
44: "Hello World"
44: "Hello World"
44: "Hello World"
111: 452235
}
}
"#;
// C strings are null terminated while Rust strings are not.
let null_terminated_str = format!("{}\0", golden);
assert_that!(format!("{:?}", msg_map), eq(null_terminated_str.as_str()));
}

@ -25,6 +25,7 @@ rust_library(
"opaque_pointee.rs",
"owned_arena_box.rs",
"string_view.rs",
"text.rs",
"wire.rs",
],
visibility = [
@ -48,5 +49,6 @@ cc_library(
"//upb:message_compare",
"//upb:message_copy",
"//upb/mini_table",
"//upb/text:debug",
],
)

@ -50,5 +50,8 @@ pub use owned_arena_box::OwnedArenaBox;
mod string_view;
pub use string_view::StringView;
mod text;
pub use text::debug_string;
pub mod wire;
pub use wire::{upb_Decode, DecodeStatus, EncodeStatus};

@ -0,0 +1,66 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
use crate::{upb_MiniTable, RawMessage};
extern "C" {
/// Returns the minimum needed length (excluding NULL) that `buf` has to be
/// to hold the `msg`s debug string.
///
/// SAFETY:
/// - `msg` is pointing at a valid upb_Message with associated minitable
/// `mt`
/// - `buf` is legally writable for `size` bytes (`buf` may be nullptr if
/// `size` is 0)
fn upb_DebugString(
msg: RawMessage,
mt: *const upb_MiniTable,
options: i32,
buf: *mut u8,
size: usize,
) -> usize;
}
#[allow(dead_code)]
#[repr(i32)]
enum Options {
// When set, prints everything on a single line.
SingleLine = 1,
// When set, unknown fields are not printed.
SkipUnknown = 2,
// When set, maps are *not* sorted (this avoids allocating tmp mem).
NoSortMaps = 4,
}
/// Returns a string of field number to value entries of a message.
///
/// # Safety
/// - `mt` must correspond to the `msg`s minitable.
pub unsafe fn debug_string(msg: RawMessage, mt: *const upb_MiniTable) -> String {
// Only find out the length first to then allocate a buffer of the minimum size
// needed.
// SAFETY:
// - `msg` is a legally dereferencable upb_Message whose associated minitable is
// `mt`
// - `buf` is nullptr and `buf_len` is 0
let len =
unsafe { upb_DebugString(msg, mt, Options::NoSortMaps as i32, std::ptr::null_mut(), 0) };
assert!(len < isize::MAX as usize);
// +1 for the trailing NULL
let mut buf = vec![0u8; len + 1];
// SAFETY:
// - `msg` is a legally dereferencable upb_Message whose associated minitable is
// `mt`
// - `buf` is legally writable for 'buf_len' bytes
let written_len = unsafe {
upb_DebugString(msg, mt, Options::NoSortMaps as i32, buf.as_mut_ptr(), buf.len())
};
assert_eq!(len, written_len);
String::from_utf8_lossy(buf.as_slice()).to_string()
}

@ -19,6 +19,7 @@
#include "upb/message/map.h" // IWYU pragma: keep
#include "upb/message/merge.h" // IWYU pragma: keep
#include "upb/mini_table/message.h" // IWYU pragma: keep
#include "upb/text/debug_string.h" // IWYU pragma: keep
// go/keep-sorted end
const size_t __rust_proto_kUpb_Map_Begin = kUpb_Map_Begin;

@ -156,11 +156,16 @@ void MessageDebug(Context& ctx, const Descriptor& msg) {
return;
case Kernel::kUpb:
ctx.Emit({},
ctx.Emit({{"minitable", UpbMinitableName(msg)}},
R"rs(
f.debug_struct(std::any::type_name::<Self>())
.field("raw_msg", &self.raw_msg())
.finish()
let mini_table = unsafe { $std$::ptr::addr_of!($minitable$) };
let string = unsafe {
$pbr$::debug_string(
self.raw_msg(),
mini_table,
)
};
write!(f, "{}", string)
)rs");
return;
}

@ -281,6 +281,7 @@ upb_amalgamation(
"//upb/lex:lex",
"//upb/mem:internal",
"//upb/message:internal",
"//upb/message:iterator",
"//upb/message:types",
"//upb/mini_descriptor:internal",
"//upb/mini_table:internal",
@ -327,6 +328,7 @@ upb_amalgamation(
"//upb/lex:lex",
"//upb/mem:internal",
"//upb/message:internal",
"//upb/message:iterator",
"//upb/message:types",
"//upb/mini_descriptor:internal",
"//upb/mini_table:internal",
@ -374,6 +376,7 @@ upb_amalgamation(
"//upb/lex:lex",
"//upb/mem:internal",
"//upb/message:internal",
"//upb/message:iterator",
"//upb/message:types",
"//upb/mini_descriptor:internal",
"//upb/mini_table:internal",

@ -85,6 +85,24 @@ cc_library(
],
)
cc_library(
name = "iterator",
srcs = [
"internal/iterator.c",
],
hdrs = [
"internal/iterator.h",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
":internal",
":message",
"//upb:mini_table",
"//upb:port",
],
)
cc_library(
name = "compare",
srcs = [
@ -97,6 +115,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":internal",
":iterator",
":message",
"//upb:base",
"//upb:mini_table",

@ -15,6 +15,7 @@
#include "upb/message/internal/accessors.h"
#include "upb/message/internal/compare_unknown.h"
#include "upb/message/internal/extension.h"
#include "upb/message/internal/iterator.h"
#include "upb/message/map.h"
#include "upb/message/message.h"
#include "upb/mini_table/extension.h"
@ -25,74 +26,18 @@
// Must be last.
#include "upb/port/def.inc"
#define kUpb_BaseField_Begin ((size_t)-1)
#define kUpb_Extension_Begin ((size_t)-1)
#ifdef __cplusplus
extern "C" {
#endif
static bool _upb_Message_NextBaseField(const upb_Message* msg,
const upb_MiniTable* m,
const upb_MiniTableField** out_f,
upb_MessageValue* out_v, size_t* iter) {
const size_t count = upb_MiniTable_FieldCount(m);
size_t i = *iter;
while (++i < count) {
const upb_MiniTableField* f = upb_MiniTable_GetFieldByIndex(m, i);
const void* src = UPB_PRIVATE(_upb_Message_DataPtr)(msg, f);
upb_MessageValue val;
UPB_PRIVATE(_upb_MiniTableField_DataCopy)(f, &val, src);
// Skip field if unset or empty.
if (upb_MiniTableField_HasPresence(f)) {
if (!upb_Message_HasBaseField(msg, f)) continue;
} else {
if (UPB_PRIVATE(_upb_MiniTableField_DataIsZero)(f, src)) continue;
if (upb_MiniTableField_IsArray(f)) {
if (upb_Array_Size(val.array_val) == 0) continue;
} else if (upb_MiniTableField_IsMap(f)) {
if (upb_Map_Size(val.map_val) == 0) continue;
}
}
*out_f = f;
*out_v = val;
*iter = i;
return true;
}
return false;
}
static bool _upb_Message_NextExtension(const upb_Message* msg,
const upb_MiniTable* m,
const upb_MiniTableExtension** out_e,
upb_MessageValue* out_v, size_t* iter) {
size_t count;
const upb_Extension* exts = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count);
size_t i = *iter;
if (++i < count) {
*out_e = exts[i].ext;
*out_v = exts[i].data;
*iter = i;
return true;
}
return false;
}
bool upb_Message_IsEmpty(const upb_Message* msg, const upb_MiniTable* m) {
if (upb_Message_ExtensionCount(msg)) return false;
const upb_MiniTableField* f;
upb_MessageValue v;
size_t iter = kUpb_BaseField_Begin;
return !_upb_Message_NextBaseField(msg, m, &f, &v, &iter);
return !UPB_PRIVATE(_upb_Message_NextBaseField)(msg, m, &f, &v, &iter);
}
static bool _upb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2,
@ -154,8 +99,10 @@ static bool _upb_Message_BaseFieldsAreEqual(const upb_Message* msg1,
const upb_MiniTableField *f1, *f2;
upb_MessageValue val1, val2;
const bool got1 = _upb_Message_NextBaseField(msg1, m, &f1, &val1, &iter1);
const bool got2 = _upb_Message_NextBaseField(msg2, m, &f2, &val2, &iter2);
const bool got1 =
UPB_PRIVATE(_upb_Message_NextBaseField)(msg1, m, &f1, &val1, &iter1);
const bool got2 =
UPB_PRIVATE(_upb_Message_NextBaseField)(msg2, m, &f2, &val2, &iter2);
if (got1 != got2) return false; // Must have identical field counts.
if (!got1) return true; // Loop termination condition.
@ -195,7 +142,7 @@ static bool _upb_Message_ExtensionsAreEqual(const upb_Message* msg1,
// Iterate over all extensions for msg1, and search msg2 for each extension.
size_t iter1 = kUpb_Extension_Begin;
while (_upb_Message_NextExtension(msg1, m, &e, &val1, &iter1)) {
while (UPB_PRIVATE(_upb_Message_NextExtension)(msg1, m, &e, &val1, &iter1)) {
const upb_Extension* ext2 = UPB_PRIVATE(_upb_Message_Getext)(msg2, e);
if (!ext2) return false;

@ -0,0 +1,78 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "upb/message/internal/iterator.h" // IWYU pragma: keep
#include <stddef.h>
#include "upb/message/accessors.h"
#include "upb/message/array.h"
#include "upb/message/internal/accessors.h"
#include "upb/message/internal/extension.h"
#include "upb/message/map.h"
#include "upb/message/message.h"
#include "upb/mini_table/extension.h"
#include "upb/mini_table/field.h"
#include "upb/mini_table/message.h"
// Must be last.
#include "upb/port/def.inc"
bool UPB_PRIVATE(_upb_Message_NextBaseField)(const upb_Message* msg,
const upb_MiniTable* m,
const upb_MiniTableField** out_f,
upb_MessageValue* out_v,
size_t* iter) {
const size_t count = upb_MiniTable_FieldCount(m);
size_t i = *iter;
while (++i < count) {
const upb_MiniTableField* f = upb_MiniTable_GetFieldByIndex(m, i);
const void* src = UPB_PRIVATE(_upb_Message_DataPtr)(msg, f);
upb_MessageValue val;
UPB_PRIVATE(_upb_MiniTableField_DataCopy)(f, &val, src);
// Skip field if unset or empty.
if (upb_MiniTableField_HasPresence(f)) {
if (!upb_Message_HasBaseField(msg, f)) continue;
} else {
if (UPB_PRIVATE(_upb_MiniTableField_DataIsZero)(f, src)) continue;
if (upb_MiniTableField_IsArray(f)) {
if (upb_Array_Size(val.array_val) == 0) continue;
} else if (upb_MiniTableField_IsMap(f)) {
if (upb_Map_Size(val.map_val) == 0) continue;
}
}
*out_f = f;
*out_v = val;
*iter = i;
return true;
}
return false;
}
bool UPB_PRIVATE(_upb_Message_NextExtension)(
const upb_Message* msg, const upb_MiniTable* m,
const upb_MiniTableExtension** out_e, upb_MessageValue* out_v,
size_t* iter) {
size_t count;
const upb_Extension* exts = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count);
size_t i = *iter;
if (++i < count) {
*out_e = exts[i].ext;
*out_v = exts[i].data;
*iter = i;
return true;
}
return false;
}

@ -0,0 +1,35 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef THIRD_PARTY_UPB_UPB_MESSAGE_INTERNAL_ITERATOR_H_
#define THIRD_PARTY_UPB_UPB_MESSAGE_INTERNAL_ITERATOR_H_
#include <stddef.h>
#include "upb/message/message.h"
#include "upb/message/value.h"
#include "upb/mini_table/extension.h"
#include "upb/mini_table/field.h"
#include "upb/mini_table/message.h"
// Must be last.
#include "upb/port/def.inc"
#define kUpb_BaseField_Begin ((size_t)-1)
#define kUpb_Extension_Begin ((size_t)-1)
bool UPB_PRIVATE(_upb_Message_NextBaseField)(const upb_Message* msg,
const upb_MiniTable* m,
const upb_MiniTableField** out_f,
upb_MessageValue* out_v,
size_t* iter);
bool UPB_PRIVATE(_upb_Message_NextExtension)(
const upb_Message* msg, const upb_MiniTable* m,
const upb_MiniTableExtension** out_e, upb_MessageValue* out_v,
size_t* iter);
#endif // THIRD_PARTY_UPB_UPB_MESSAGE_INTERNAL_ITERATOR_H_

@ -14,23 +14,95 @@ cc_library(
],
hdrs = [
"encode.h",
"options.h",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
"//third_party/utf8_range",
":internal",
"//upb:base",
"//upb:eps_copy_input_stream",
"//upb:message",
"//upb:port",
"//upb:reflection",
"//upb:wire_reader",
"//upb/lex",
"//upb/message:internal",
"//upb/message:types",
],
)
cc_library(
name = "debug",
srcs = [
"debug_string.c",
],
hdrs = [
"debug_string.h",
"options.h",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
":internal",
"//upb:base",
"//upb:eps_copy_input_stream",
"//upb:message",
"//upb:mini_table",
"//upb:port",
"//upb/lex",
"//upb/message:internal",
"//upb/message:iterator",
"//upb/message:types",
"//upb/mini_table:internal",
],
)
cc_library(
name = "internal",
srcs = [
"internal/encode.c",
],
hdrs = [
"internal/encode.h",
"options.h",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
"//third_party/utf8_range",
"//upb:base",
"//upb:eps_copy_input_stream",
"//upb:message",
"//upb:port",
"//upb:wire_reader",
"//upb/lex",
"//upb/message:internal",
],
)
cc_test(
name = "encode_debug_test",
srcs = [
"encode_debug_test.cc",
],
deps = [
":debug",
"//upb:base",
"//upb:eps_copy_input_stream",
"//upb:mem",
"//upb:message",
"//upb:mini_table",
"//upb:port",
"//upb:wire_reader",
"//upb/message:internal",
"//upb/test:test_proto_upb_minitable",
"//upb/test:test_upb_proto",
"@com_google_absl//absl/log:absl_log",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
)
# begin:github_only
filegroup(
name = "source_files",

@ -0,0 +1,235 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "upb/text/debug_string.h"
#include <inttypes.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "upb/base/descriptor_constants.h"
#include "upb/message/array.h"
#include "upb/message/internal/iterator.h"
#include "upb/message/internal/map_entry.h"
#include "upb/message/internal/map_sorter.h"
#include "upb/message/map.h"
#include "upb/message/message.h"
#include "upb/message/value.h"
#include "upb/mini_table/extension.h"
#include "upb/mini_table/field.h"
#include "upb/mini_table/internal/field.h"
#include "upb/mini_table/internal/message.h"
#include "upb/mini_table/message.h"
#include "upb/text/internal/encode.h"
#include "upb/wire/eps_copy_input_stream.h"
// Must be last.
#include "upb/port/def.inc"
static void _upb_MessageDebugString(txtenc* e, const upb_Message* msg,
const upb_MiniTable* mt);
static void _upb_FieldDebugString(txtenc* e, upb_MessageValue val,
const upb_MiniTableField* f,
const upb_MiniTable* mt, const char* label,
const upb_MiniTableExtension* ext) {
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
const upb_CType ctype = upb_MiniTableField_CType(f);
const bool is_ext = upb_MiniTableField_IsExtension(f);
char number[10]; // A 32-bit integer can hold up to 10 digits.
snprintf(number, sizeof(number), "%" PRIu32, upb_MiniTableField_Number(f));
// label is to pass down whether we're dealing with a "key" of a map or
// a "value" of a map.
if (!label) label = number;
if (is_ext) {
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "[%s]", label);
} else {
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%s", label);
}
if (ctype == kUpb_CType_Message) {
UPB_PRIVATE(_upb_TextEncode_Printf)(e, " {");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
e->indent_depth++;
const upb_MiniTable* subm = ext ? upb_MiniTableExtension_GetSubMessage(ext)
: upb_MiniTable_SubMessage(mt, f);
_upb_MessageDebugString(e, val.msg_val, subm);
e->indent_depth--;
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "}");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
return;
}
UPB_PRIVATE(_upb_TextEncode_Printf)(e, ": ");
if (ctype ==
kUpb_CType_Enum) { // Enum has to be processed separately because of
// divergent behavior between encoders
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRId32, val.int32_val);
} else {
UPB_PRIVATE(_upb_TextEncode_Scalar)(e, val, ctype);
}
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
}
/*
* Arrays print as simple repeated elements, eg.
*
* 5: 1
* 5: 2
* 5: 3
*/
static void _upb_ArrayDebugString(txtenc* e, const upb_Array* arr,
const upb_MiniTableField* f,
const upb_MiniTable* mt,
const upb_MiniTableExtension* ext) {
for (size_t i = 0, n = upb_Array_Size(arr); i < n; i++) {
_upb_FieldDebugString(e, upb_Array_Get(arr, i), f, mt, NULL, ext);
}
}
static void _upb_MapEntryDebugString(txtenc* e, upb_MessageValue key,
upb_MessageValue val,
const upb_MiniTableField* f,
const upb_MiniTable* mt) {
const upb_MiniTable* entry = upb_MiniTable_SubMessage(mt, f);
const upb_MiniTableField* key_f = upb_MiniTable_MapKey(entry);
const upb_MiniTableField* val_f = upb_MiniTable_MapValue(entry);
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%u {", upb_MiniTableField_Number(f));
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
e->indent_depth++;
_upb_FieldDebugString(e, key, key_f, entry, "key", NULL);
_upb_FieldDebugString(e, val, val_f, entry, "value", NULL);
e->indent_depth--;
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "}");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
}
/*
* Maps print as messages of key/value, etc.
*
* 1 {
* key: "abc"
* value: 123
* }
* 2 {
* key: "def"
* value: 456
* }
*/
static void _upb_MapDebugString(txtenc* e, const upb_Map* map,
const upb_MiniTableField* f,
const upb_MiniTable* mt) {
if (e->options & UPB_TXTENC_NOSORT) {
size_t iter = kUpb_Map_Begin;
upb_MessageValue key, val;
while (upb_Map_Next(map, &key, &val, &iter)) {
_upb_MapEntryDebugString(e, key, val, f, mt);
}
} else {
if (upb_Map_Size(map) == 0) return;
const upb_MiniTable* entry = upb_MiniTable_SubMessage(mt, f);
const upb_MiniTableField* key_f = upb_MiniTable_GetFieldByIndex(entry, 0);
_upb_sortedmap sorted;
upb_MapEntry ent;
_upb_mapsorter_pushmap(&e->sorter, upb_MiniTableField_Type(key_f), map,
&sorted);
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
upb_MessageValue key, val;
memcpy(&key, &ent.k, sizeof(key));
memcpy(&val, &ent.v, sizeof(val));
_upb_MapEntryDebugString(e, key, val, f, mt);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
}
}
static void _upb_MessageDebugString(txtenc* e, const upb_Message* msg,
const upb_MiniTable* mt) {
size_t iter = kUpb_BaseField_Begin;
const upb_MiniTableField* f;
upb_MessageValue val;
// Base fields will be printed out first, followed by extension fields, and
// finally unknown fields.
while (UPB_PRIVATE(_upb_Message_NextBaseField)(msg, mt, &f, &val, &iter)) {
if (upb_MiniTableField_IsMap(f)) {
_upb_MapDebugString(e, val.map_val, f, mt);
} else if (upb_MiniTableField_IsArray(f)) {
// ext set to NULL as we're not dealing with extensions yet
_upb_ArrayDebugString(e, val.array_val, f, mt, NULL);
} else {
// ext set to NULL as we're not dealing with extensions yet
// label set to NULL as we're not currently working with a MapEntry
_upb_FieldDebugString(e, val, f, mt, NULL, NULL);
}
}
const upb_MiniTableExtension* ext;
upb_MessageValue val_ext;
iter = kUpb_Extension_Begin;
while (
UPB_PRIVATE(_upb_Message_NextExtension)(msg, mt, &ext, &val_ext, &iter)) {
const upb_MiniTableField* f = &ext->UPB_PRIVATE(field);
// It is not sufficient to only pass |f| as we lose valuable information
// about sub-messages. It is required that we pass |ext|.
if (upb_MiniTableField_IsMap(f)) {
UPB_UNREACHABLE(); // Maps cannot be extensions.
break;
} else if (upb_MiniTableField_IsArray(f)) {
_upb_ArrayDebugString(e, val_ext.array_val, f, mt, ext);
} else {
// label set to NULL as we're not currently working with a MapEntry
_upb_FieldDebugString(e, val_ext, f, mt, NULL, ext);
}
}
if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) {
size_t size;
const char* ptr = upb_Message_GetUnknown(msg, &size);
if (size != 0) {
char* start = e->ptr;
upb_EpsCopyInputStream stream;
upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
if (!UPB_PRIVATE(_upb_TextEncode_Unknown)(e, ptr, &stream, -1)) {
/* Unknown failed to parse, back up and don't print it at all. */
e->ptr = start;
}
}
}
}
size_t upb_DebugString(const upb_Message* msg, const upb_MiniTable* mt,
int options, char* buf, size_t size) {
txtenc e;
e.buf = buf;
e.ptr = buf;
e.end = UPB_PTRADD(buf, size);
e.overflow = 0;
e.indent_depth = 0;
e.options = options;
e.ext_pool = NULL;
_upb_mapsorter_init(&e.sorter);
_upb_MessageDebugString(&e, msg, mt);
_upb_mapsorter_destroy(&e.sorter);
return UPB_PRIVATE(_upb_TextEncode_Nullz)(&e, size);
}

@ -0,0 +1,42 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef UPB_TEXT_ENCODE_DEBUG_H_
#define UPB_TEXT_ENCODE_DEBUG_H_
#include <stddef.h>
#include "upb/message/message.h"
#include "upb/mini_table/message.h"
#include "upb/text/options.h" // IWYU pragma: export
// Must be last.
#include "upb/port/def.inc"
#ifdef __cplusplus
extern "C" {
#endif
/* Encodes the given |msg| to a psuedo-text format: Instead of printing field
* name to value entries, it will print field number to value entries; much like
* how unknown fields are printed in upb_TextEncode in this directory's
* encode.h. |mt| should correspond to the |msg|'s minitable.
*
* Output is placed in the given buffer, and always NULL-terminated. The output
* size (excluding NULL) iss returned. This means that a return value >= |size|
* implies that the output was truncated. (These are the same semantics as
* snprintf()). */
UPB_API size_t upb_DebugString(const upb_Message* msg, const upb_MiniTable* mt,
int options, char* buf, size_t size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#include "upb/port/undef.inc"
#endif /* UPB_TEXT_ENCODE_DEBUG_H_ */

@ -7,10 +7,9 @@
#include "upb/text/encode.h"
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
@ -23,213 +22,32 @@
#include "upb/message/map.h"
#include "upb/message/message.h"
#include "upb/message/value.h"
#include "upb/port/vsnprintf_compat.h"
#include "upb/reflection/def.h"
#include "upb/reflection/message.h"
#include "upb/text/internal/encode.h"
#include "upb/wire/eps_copy_input_stream.h"
#include "upb/wire/reader.h"
#include "upb/wire/types.h"
#include "utf8_range.h"
// Must be last.
#include "upb/port/def.inc"
typedef struct {
char *buf, *ptr, *end;
size_t overflow;
int indent_depth;
int options;
const upb_DefPool* ext_pool;
_upb_mapsorter sorter;
} txtenc;
static void _upb_TextEncode_Msg(txtenc* e, const upb_Message* msg,
const upb_MessageDef* m);
static void txtenc_msg(txtenc* e, const upb_Message* msg,
const upb_MessageDef* m);
static void txtenc_putbytes(txtenc* e, const void* data, size_t len) {
size_t have = e->end - e->ptr;
if (UPB_LIKELY(have >= len)) {
memcpy(e->ptr, data, len);
e->ptr += len;
} else {
if (have) {
memcpy(e->ptr, data, have);
e->ptr += have;
}
e->overflow += (len - have);
}
}
static void txtenc_putstr(txtenc* e, const char* str) {
txtenc_putbytes(e, str, strlen(str));
}
static void txtenc_printf(txtenc* e, const char* fmt, ...) {
size_t n;
size_t have = e->end - e->ptr;
va_list args;
va_start(args, fmt);
n = _upb_vsnprintf(e->ptr, have, fmt, args);
va_end(args);
if (UPB_LIKELY(have > n)) {
e->ptr += n;
} else {
e->ptr = UPB_PTRADD(e->ptr, have);
e->overflow += (n - have);
}
}
static void txtenc_indent(txtenc* e) {
if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
int i = e->indent_depth;
while (i-- > 0) {
txtenc_putstr(e, " ");
}
}
}
static void txtenc_endfield(txtenc* e) {
if (e->options & UPB_TXTENC_SINGLELINE) {
txtenc_putstr(e, " ");
} else {
txtenc_putstr(e, "\n");
}
}
static void txtenc_enum(int32_t val, const upb_FieldDef* f, txtenc* e) {
static void _upb_TextEncode_Enum(int32_t val, const upb_FieldDef* f,
txtenc* e) {
const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
if (ev) {
txtenc_printf(e, "%s", upb_EnumValueDef_Name(ev));
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%s", upb_EnumValueDef_Name(ev));
} else {
txtenc_printf(e, "%" PRId32, val);
}
}
static void txtenc_escaped(txtenc* e, unsigned char ch) {
switch (ch) {
case '\n':
txtenc_putstr(e, "\\n");
break;
case '\r':
txtenc_putstr(e, "\\r");
break;
case '\t':
txtenc_putstr(e, "\\t");
break;
case '\"':
txtenc_putstr(e, "\\\"");
break;
case '\'':
txtenc_putstr(e, "\\'");
break;
case '\\':
txtenc_putstr(e, "\\\\");
break;
default:
txtenc_printf(e, "\\%03o", ch);
break;
}
}
// Returns true if `ch` needs to be escaped in TextFormat, independent of any
// UTF-8 validity issues.
static bool upb_DefinitelyNeedsEscape(unsigned char ch) {
if (ch < 32) return true;
switch (ch) {
case '\"':
case '\'':
case '\\':
case 127:
return true;
}
return false;
}
static bool upb_AsciiIsPrint(unsigned char ch) { return ch >= 32 && ch < 127; }
// Returns true if this is a high byte that requires UTF-8 validation. If the
// UTF-8 validation fails, we must escape the byte.
static bool upb_NeedsUtf8Validation(unsigned char ch) { return ch > 127; }
// Returns the number of bytes in the prefix of `val` that do not need escaping.
// This is like utf8_range::SpanStructurallyValid(), except that it also
// terminates at any ASCII char that needs to be escaped in TextFormat (any char
// that has `DefinitelyNeedsEscape(ch) == true`).
//
// If we could get a variant of utf8_range::SpanStructurallyValid() that could
// terminate on any of these chars, that might be more efficient, but it would
// be much more complicated to modify that heavily SIMD code.
static size_t SkipPassthroughBytes(const char* ptr, size_t size) {
for (size_t i = 0; i < size; i++) {
unsigned char uc = ptr[i];
if (upb_DefinitelyNeedsEscape(uc)) return i;
if (upb_NeedsUtf8Validation(uc)) {
// Find the end of this region of consecutive high bytes, so that we only
// give high bytes to the UTF-8 checker. This avoids needing to perform
// a second scan of the ASCII characters looking for characters that
// need escaping.
//
// We assume that high bytes are less frequent than plain, printable ASCII
// bytes, so we accept the double-scan of high bytes.
size_t end = i + 1;
for (; end < size; end++) {
if (!upb_NeedsUtf8Validation(ptr[end])) break;
}
size_t n = end - i;
size_t ok = utf8_range_ValidPrefix(ptr + i, n);
if (ok != n) return i + ok;
i += ok - 1;
}
}
return size;
}
static void upb_HardenedPrintString(txtenc* e, const char* ptr, size_t len) {
// Print as UTF-8, while guarding against any invalid UTF-8 in the string
// field.
//
// If in the future we have a guaranteed invariant that invalid UTF-8 will
// never be present, we could avoid the UTF-8 check here.
txtenc_putstr(e, "\"");
const char* end = ptr + len;
while (ptr < end) {
size_t n = SkipPassthroughBytes(ptr, end - ptr);
if (n != 0) {
txtenc_putbytes(e, ptr, n);
ptr += n;
if (ptr == end) break;
}
// If repeated calls to CEscape() and PrintString() are expensive, we could
// consider batching them, at the cost of some complexity.
txtenc_escaped(e, *ptr);
ptr++;
}
txtenc_putstr(e, "\"");
}
static void txtenc_bytes(txtenc* e, upb_StringView data) {
const char* ptr = data.data;
const char* end = ptr + data.size;
txtenc_putstr(e, "\"");
for (; ptr < end; ptr++) {
unsigned char uc = *ptr;
if (upb_AsciiIsPrint(uc)) {
txtenc_putbytes(e, ptr, 1);
} else {
txtenc_escaped(e, uc);
}
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRId32, val);
}
txtenc_putstr(e, "\"");
}
static void txtenc_field(txtenc* e, upb_MessageValue val,
const upb_FieldDef* f) {
txtenc_indent(e);
static void _upb_TextEncode_Field(txtenc* e, upb_MessageValue val,
const upb_FieldDef* f) {
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
const upb_CType ctype = upb_FieldDef_CType(f);
const bool is_ext = upb_FieldDef_IsExtension(f);
const char* full = upb_FieldDef_FullName(f);
@ -244,68 +62,33 @@ static void txtenc_field(txtenc* e, upb_MessageValue val,
// }
// end:google_only
if (is_ext) {
txtenc_printf(e, "[%s] {", full);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "[%s] {", full);
} else {
txtenc_printf(e, "%s {", name);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%s {", name);
}
txtenc_endfield(e);
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
e->indent_depth++;
txtenc_msg(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
_upb_TextEncode_Msg(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
txtenc_endfield(e);
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "}");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
return;
}
if (is_ext) {
txtenc_printf(e, "[%s]: ", full);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "[%s]: ", full);
} else {
txtenc_printf(e, "%s: ", name);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%s: ", name);
}
switch (ctype) {
case kUpb_CType_Bool:
txtenc_putstr(e, val.bool_val ? "true" : "false");
break;
case kUpb_CType_Float: {
char buf[32];
_upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf));
txtenc_putstr(e, buf);
break;
}
case kUpb_CType_Double: {
char buf[32];
_upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf));
txtenc_putstr(e, buf);
break;
}
case kUpb_CType_Int32:
txtenc_printf(e, "%" PRId32, val.int32_val);
break;
case kUpb_CType_UInt32:
txtenc_printf(e, "%" PRIu32, val.uint32_val);
break;
case kUpb_CType_Int64:
txtenc_printf(e, "%" PRId64, val.int64_val);
break;
case kUpb_CType_UInt64:
txtenc_printf(e, "%" PRIu64, val.uint64_val);
break;
case kUpb_CType_String:
upb_HardenedPrintString(e, val.str_val.data, val.str_val.size);
break;
case kUpb_CType_Bytes:
txtenc_bytes(e, val.str_val);
break;
case kUpb_CType_Enum:
txtenc_enum(val.int32_val, f, e);
break;
default:
UPB_UNREACHABLE();
if (ctype == kUpb_CType_Enum) {
_upb_TextEncode_Enum(val.int32_val, f, e);
} else {
UPB_PRIVATE(_upb_TextEncode_Scalar)(e, val, ctype);
}
txtenc_endfield(e);
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
}
/*
@ -315,33 +98,34 @@ static void txtenc_field(txtenc* e, upb_MessageValue val,
* foo_field: 2
* foo_field: 3
*/
static void txtenc_array(txtenc* e, const upb_Array* arr,
const upb_FieldDef* f) {
static void _upb_TextEncode_Array(txtenc* e, const upb_Array* arr,
const upb_FieldDef* f) {
size_t i;
size_t size = upb_Array_Size(arr);
for (i = 0; i < size; i++) {
txtenc_field(e, upb_Array_Get(arr, i), f);
_upb_TextEncode_Field(e, upb_Array_Get(arr, i), f);
}
}
static void txtenc_mapentry(txtenc* e, upb_MessageValue key,
upb_MessageValue val, const upb_FieldDef* f) {
static void _upb_TextEncode_MapEntry(txtenc* e, upb_MessageValue key,
upb_MessageValue val,
const upb_FieldDef* f) {
const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef* key_f = upb_MessageDef_Field(entry, 0);
const upb_FieldDef* val_f = upb_MessageDef_Field(entry, 1);
txtenc_indent(e);
txtenc_printf(e, "%s {", upb_FieldDef_Name(f));
txtenc_endfield(e);
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%s {", upb_FieldDef_Name(f));
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
e->indent_depth++;
txtenc_field(e, key, key_f);
txtenc_field(e, val, val_f);
_upb_TextEncode_Field(e, key, key_f);
_upb_TextEncode_Field(e, val, val_f);
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
txtenc_endfield(e);
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "}");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
}
/*
@ -356,12 +140,13 @@ static void txtenc_mapentry(txtenc* e, upb_MessageValue key,
* value: 456
* }
*/
static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) {
static void _upb_TextEncode_Map(txtenc* e, const upb_Map* map,
const upb_FieldDef* f) {
if (e->options & UPB_TXTENC_NOSORT) {
size_t iter = kUpb_Map_Begin;
upb_MessageValue key, val;
while (upb_Map_Next(map, &key, &val, &iter)) {
txtenc_mapentry(e, key, val, f);
_upb_TextEncode_MapEntry(e, key, val, f);
}
} else {
if (upb_Map_Size(map) == 0) return;
@ -376,135 +161,25 @@ static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) {
upb_MessageValue key, val;
memcpy(&key, &ent.k, sizeof(key));
memcpy(&val, &ent.v, sizeof(val));
txtenc_mapentry(e, key, val, f);
_upb_TextEncode_MapEntry(e, key, val, f);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
}
}
#define CHK(x) \
do { \
if (!(x)) { \
return false; \
} \
} while (0)
/*
* Unknown fields are printed by number.
*
* 1001: 123
* 1002: "hello"
* 1006: 0xdeadbeef
* 1003: {
* 1: 111
* }
*/
static const char* txtenc_unknown(txtenc* e, const char* ptr,
upb_EpsCopyInputStream* stream,
int groupnum) {
// We are guaranteed that the unknown data is valid wire format, and will not
// contain tag zero.
uint32_t end_group = groupnum > 0
? ((groupnum << kUpb_WireReader_WireTypeBits) |
kUpb_WireType_EndGroup)
: 0;
while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
uint32_t tag;
CHK(ptr = upb_WireReader_ReadTag(ptr, &tag));
if (tag == end_group) return ptr;
txtenc_indent(e);
txtenc_printf(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag));
switch (upb_WireReader_GetWireType(tag)) {
case kUpb_WireType_Varint: {
uint64_t val;
CHK(ptr = upb_WireReader_ReadVarint(ptr, &val));
txtenc_printf(e, "%" PRIu64, val);
break;
}
case kUpb_WireType_32Bit: {
uint32_t val;
ptr = upb_WireReader_ReadFixed32(ptr, &val);
txtenc_printf(e, "0x%08" PRIu32, val);
break;
}
case kUpb_WireType_64Bit: {
uint64_t val;
ptr = upb_WireReader_ReadFixed64(ptr, &val);
txtenc_printf(e, "0x%016" PRIu64, val);
break;
}
case kUpb_WireType_Delimited: {
int size;
char* start = e->ptr;
size_t start_overflow = e->overflow;
CHK(ptr = upb_WireReader_ReadSize(ptr, &size));
CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size));
// Speculatively try to parse as message.
txtenc_putstr(e, "{");
txtenc_endfield(e);
// EpsCopyInputStream can't back up, so create a sub-stream for the
// speculative parse.
upb_EpsCopyInputStream sub_stream;
const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr);
upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true);
e->indent_depth++;
if (txtenc_unknown(e, sub_ptr, &sub_stream, -1)) {
ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size);
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
} else {
// Didn't work out, print as raw bytes.
e->indent_depth--;
e->ptr = start;
e->overflow = start_overflow;
const char* str = ptr;
ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL);
UPB_ASSERT(ptr);
txtenc_bytes(e, (upb_StringView){.data = str, .size = size});
}
break;
}
case kUpb_WireType_StartGroup:
txtenc_putstr(e, "{");
txtenc_endfield(e);
e->indent_depth++;
CHK(ptr = txtenc_unknown(e, ptr, stream,
upb_WireReader_GetFieldNumber(tag)));
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
break;
default:
return NULL;
}
txtenc_endfield(e);
}
return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL;
}
#undef CHK
static void txtenc_msg(txtenc* e, const upb_Message* msg,
const upb_MessageDef* m) {
static void _upb_TextEncode_Msg(txtenc* e, const upb_Message* msg,
const upb_MessageDef* m) {
size_t iter = kUpb_Message_Begin;
const upb_FieldDef* f;
upb_MessageValue val;
while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
if (upb_FieldDef_IsMap(f)) {
txtenc_map(e, val.map_val, f);
_upb_TextEncode_Map(e, val.map_val, f);
} else if (upb_FieldDef_IsRepeated(f)) {
txtenc_array(e, val.array_val, f);
_upb_TextEncode_Array(e, val.array_val, f);
} else {
txtenc_field(e, val, f);
_upb_TextEncode_Field(e, val, f);
}
}
@ -515,7 +190,7 @@ static void txtenc_msg(txtenc* e, const upb_Message* msg,
char* start = e->ptr;
upb_EpsCopyInputStream stream;
upb_EpsCopyInputStream_Init(&stream, &ptr, size, true);
if (!txtenc_unknown(e, ptr, &stream, -1)) {
if (!UPB_PRIVATE(_upb_TextEncode_Unknown)(e, ptr, &stream, -1)) {
/* Unknown failed to parse, back up and don't print it at all. */
e->ptr = start;
}
@ -523,17 +198,6 @@ static void txtenc_msg(txtenc* e, const upb_Message* msg,
}
}
size_t txtenc_nullz(txtenc* e, size_t size) {
size_t ret = e->ptr - e->buf + e->overflow;
if (size > 0) {
if (e->ptr == e->end) e->ptr--;
*e->ptr = '\0';
}
return ret;
}
size_t upb_TextEncode(const upb_Message* msg, const upb_MessageDef* m,
const upb_DefPool* ext_pool, int options, char* buf,
size_t size) {
@ -548,7 +212,7 @@ size_t upb_TextEncode(const upb_Message* msg, const upb_MessageDef* m,
e.ext_pool = ext_pool;
_upb_mapsorter_init(&e.sorter);
txtenc_msg(&e, msg, m);
_upb_TextEncode_Msg(&e, msg, m);
_upb_mapsorter_destroy(&e.sorter);
return txtenc_nullz(&e, size);
return UPB_PRIVATE(_upb_TextEncode_Nullz)(&e, size);
}

@ -9,6 +9,7 @@
#define UPB_TEXT_ENCODE_H_
#include "upb/reflection/def.h"
#include "upb/text/options.h" // IWYU pragma: export
// Must be last.
#include "upb/port/def.inc"
@ -17,17 +18,6 @@
extern "C" {
#endif
enum {
// When set, prints everything on a single line.
UPB_TXTENC_SINGLELINE = 1,
// When set, unknown fields are not printed.
UPB_TXTENC_SKIPUNKNOWN = 2,
// When set, maps are *not* sorted (this avoids allocating tmp mem).
UPB_TXTENC_NOSORT = 4
};
/* Encodes the given |msg| to text format. The message's reflection is given in
* |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions
* will not be printed).

@ -0,0 +1,63 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <stddef.h>
#include <string>
#include <gtest/gtest.h>
#include "absl/log/absl_log.h"
#include "upb/base/string_view.h"
#include "upb/base/upcast.h"
#include "upb/mem/arena.h"
#include "upb/message/message.h"
#include "upb/mini_table/message.h"
#include "upb/test/test.upb.h"
#include "upb/test/test.upb_minitable.h"
#include "upb/text/debug_string.h"
TEST(TextNoReflection, Extensions) {
const upb_MiniTable* mt_main = upb_0test__ModelWithExtensions_msg_init_ptr;
upb_Arena* arena = upb_Arena_New();
upb_test_ModelExtension1* extension1 = upb_test_ModelExtension1_new(arena);
upb_test_ModelExtension1_set_str(extension1,
upb_StringView_FromString("Hello"));
upb_test_ModelExtension2* extension2 = upb_test_ModelExtension2_new(arena);
upb_test_ModelExtension2_set_i(extension2, 5);
upb_test_ModelWithExtensions* msg = upb_test_ModelWithExtensions_new(arena);
upb_test_ModelExtension1_set_model_ext(msg, extension1, arena);
upb_test_ModelExtension2_set_model_ext(msg, extension2, arena);
// Convert to a type of upb_Message*
upb_Message* input = UPB_UPCAST(msg);
// Resizing/reallocation of the buffer is not necessary since we're only
// testing that we get the expected debug string.
char* buf = new char[100];
int options =
UPB_TXTENC_NOSORT; // Does not matter, but maps will not be sorted.
size_t size = 100;
size_t real_size = upb_DebugString(input, mt_main, options, buf, size);
ABSL_LOG(INFO) << "Buffer: \n"
<< buf << "\n"
<< "Size:" << real_size << "\n";
std::string golden = R"([4135] {
9: 5
}
[1547] {
25: "Hello"
}
)";
ASSERT_EQ(buf[real_size], '\0');
std::string str(buf);
ASSERT_EQ(buf, golden);
delete[] buf;
upb_Arena_Free(arena);
}

@ -0,0 +1,180 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "upb/text/internal/encode.h"
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include "upb/base/descriptor_constants.h"
#include "upb/base/string_view.h"
#include "upb/lex/round_trip.h"
#include "upb/message/array.h"
#include "upb/wire/eps_copy_input_stream.h"
#include "upb/wire/reader.h"
#include "upb/wire/types.h"
// Must be last.
#include "upb/port/def.inc"
#define CHK(x) \
do { \
if (!(x)) { \
return false; \
} \
} while (0)
/*
* Unknown fields are printed by number.
*
* 1001: 123
* 1002: "hello"
* 1006: 0xdeadbeef
* 1003: {
* 1: 111
* }
*/
const char* UPB_PRIVATE(_upb_TextEncode_Unknown)(txtenc* e, const char* ptr,
upb_EpsCopyInputStream* stream,
int groupnum) {
// We are guaranteed that the unknown data is valid wire format, and will not
// contain tag zero.
uint32_t end_group = groupnum > 0
? ((groupnum << kUpb_WireReader_WireTypeBits) |
kUpb_WireType_EndGroup)
: 0;
while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) {
uint32_t tag;
CHK(ptr = upb_WireReader_ReadTag(ptr, &tag));
if (tag == end_group) return ptr;
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_Printf)
(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag));
switch (upb_WireReader_GetWireType(tag)) {
case kUpb_WireType_Varint: {
uint64_t val;
CHK(ptr = upb_WireReader_ReadVarint(ptr, &val));
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRIu64, val);
break;
}
case kUpb_WireType_32Bit: {
uint32_t val;
ptr = upb_WireReader_ReadFixed32(ptr, &val);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "0x%08" PRIu32, val);
break;
}
case kUpb_WireType_64Bit: {
uint64_t val;
ptr = upb_WireReader_ReadFixed64(ptr, &val);
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "0x%016" PRIu64, val);
break;
}
case kUpb_WireType_Delimited: {
int size;
char* start = e->ptr;
size_t start_overflow = e->overflow;
CHK(ptr = upb_WireReader_ReadSize(ptr, &size));
CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size));
// Speculatively try to parse as message.
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "{");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
// EpsCopyInputStream can't back up, so create a sub-stream for the
// speculative parse.
upb_EpsCopyInputStream sub_stream;
const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr);
upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true);
e->indent_depth++;
if (UPB_PRIVATE(_upb_TextEncode_Unknown)(e, sub_ptr, &sub_stream, -1)) {
ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size);
e->indent_depth--;
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "}");
} else {
// Didn't work out, print as raw bytes.
e->indent_depth--;
e->ptr = start;
e->overflow = start_overflow;
const char* str = ptr;
ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL);
UPB_ASSERT(ptr);
UPB_PRIVATE(_upb_TextEncode_Bytes)
(e, (upb_StringView){.data = str, .size = size});
}
break;
}
case kUpb_WireType_StartGroup:
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "{");
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
e->indent_depth++;
CHK(ptr = UPB_PRIVATE(_upb_TextEncode_Unknown)(
e, ptr, stream, upb_WireReader_GetFieldNumber(tag)));
e->indent_depth--;
UPB_PRIVATE(_upb_TextEncode_Indent)(e);
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "}");
break;
default:
return NULL;
}
UPB_PRIVATE(_upb_TextEncode_EndField)(e);
}
return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL;
}
#undef CHK
void UPB_PRIVATE(_upb_TextEncode_Scalar)(txtenc* e, upb_MessageValue val,
upb_CType ctype) {
switch (ctype) {
case kUpb_CType_Bool:
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, val.bool_val ? "true" : "false");
break;
case kUpb_CType_Float: {
char buf[32];
_upb_EncodeRoundTripFloat(val.float_val, buf, sizeof(buf));
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, buf);
break;
}
case kUpb_CType_Double: {
char buf[32];
_upb_EncodeRoundTripDouble(val.double_val, buf, sizeof(buf));
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, buf);
break;
}
case kUpb_CType_Int32:
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRId32, val.int32_val);
break;
case kUpb_CType_UInt32:
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRIu32, val.uint32_val);
break;
case kUpb_CType_Int64:
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRId64, val.int64_val);
break;
case kUpb_CType_UInt64:
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "%" PRIu64, val.uint64_val);
break;
case kUpb_CType_String:
UPB_PRIVATE(_upb_HardenedPrintString)
(e, val.str_val.data, val.str_val.size);
break;
case kUpb_CType_Bytes:
UPB_PRIVATE(_upb_TextEncode_Bytes)(e, val.str_val);
break;
case kUpb_CType_Enum:
UPB_ASSERT(false); // handled separately in each encoder
break;
default:
UPB_UNREACHABLE();
}
}

@ -0,0 +1,240 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef UPB_TEXT_ENCODE_INTERNAL_H_
#define UPB_TEXT_ENCODE_INTERNAL_H_
#include <stdarg.h>
#include <string.h>
#include "upb/base/descriptor_constants.h"
#include "upb/base/string_view.h"
#include "upb/message/array.h"
#include "upb/message/internal/map_sorter.h"
#include "upb/port/vsnprintf_compat.h"
#include "upb/text/options.h"
#include "upb/wire/eps_copy_input_stream.h"
#include "utf8_range.h"
// Must be last.
#include "upb/port/def.inc"
typedef struct {
char *buf, *ptr, *end;
size_t overflow;
int indent_depth;
int options;
const struct upb_DefPool* ext_pool;
_upb_mapsorter sorter;
} txtenc;
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_PutBytes)(txtenc* e,
const void* data,
size_t len) {
size_t have = e->end - e->ptr;
if (UPB_LIKELY(have >= len)) {
memcpy(e->ptr, data, len);
e->ptr += len;
} else {
if (have) {
memcpy(e->ptr, data, have);
e->ptr += have;
}
e->overflow += (len - have);
}
}
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_PutStr)(txtenc* e,
const char* str) {
UPB_PRIVATE(_upb_TextEncode_PutBytes)(e, str, strlen(str));
}
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Printf)(txtenc* e, const char* fmt,
...) {
size_t n;
size_t have = e->end - e->ptr;
va_list args;
va_start(args, fmt);
n = _upb_vsnprintf(e->ptr, have, fmt, args);
va_end(args);
if (UPB_LIKELY(have > n)) {
e->ptr += n;
} else {
e->ptr = UPB_PTRADD(e->ptr, have);
e->overflow += (n - have);
}
}
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Indent)(txtenc* e) {
if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
int i = e->indent_depth;
while (i-- > 0) {
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, " ");
}
}
}
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_EndField)(txtenc* e) {
if (e->options & UPB_TXTENC_SINGLELINE) {
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, " ");
} else {
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\n");
}
}
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Escaped)(txtenc* e,
unsigned char ch) {
switch (ch) {
case '\n':
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\n");
break;
case '\r':
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\r");
break;
case '\t':
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\t");
break;
case '\"':
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\\"");
break;
case '\'':
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\'");
break;
case '\\':
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\\\");
break;
default:
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "\\%03o", ch);
break;
}
}
// Returns true if `ch` needs to be escaped in TextFormat, independent of any
// UTF-8 validity issues.
UPB_INLINE bool UPB_PRIVATE(_upb_DefinitelyNeedsEscape)(unsigned char ch) {
if (ch < 32) return true;
switch (ch) {
case '\"':
case '\'':
case '\\':
case 127:
return true;
}
return false;
}
UPB_INLINE bool UPB_PRIVATE(_upb_AsciiIsPrint)(unsigned char ch) {
return ch >= 32 && ch < 127;
}
// Returns true if this is a high byte that requires UTF-8 validation. If the
// UTF-8 validation fails, we must escape the byte.
UPB_INLINE bool UPB_PRIVATE(_upb_NeedsUtf8Validation)(unsigned char ch) {
return ch > 127;
}
// Returns the number of bytes in the prefix of `val` that do not need escaping.
// This is like utf8_range::SpanStructurallyValid(), except that it also
// terminates at any ASCII char that needs to be escaped in TextFormat (any char
// that has `DefinitelyNeedsEscape(ch) == true`).
//
// If we could get a variant of utf8_range::SpanStructurallyValid() that could
// terminate on any of these chars, that might be more efficient, but it would
// be much more complicated to modify that heavily SIMD code.
UPB_INLINE size_t UPB_PRIVATE(_SkipPassthroughBytes)(const char* ptr,
size_t size) {
for (size_t i = 0; i < size; i++) {
unsigned char uc = ptr[i];
if (UPB_PRIVATE(_upb_DefinitelyNeedsEscape)(uc)) return i;
if (UPB_PRIVATE(_upb_NeedsUtf8Validation)(uc)) {
// Find the end of this region of consecutive high bytes, so that we only
// give high bytes to the UTF-8 checker. This avoids needing to perform
// a second scan of the ASCII characters looking for characters that
// need escaping.
//
// We assume that high bytes are less frequent than plain, printable ASCII
// bytes, so we accept the double-scan of high bytes.
size_t end = i + 1;
for (; end < size; end++) {
if (!UPB_PRIVATE(_upb_NeedsUtf8Validation)(ptr[end])) break;
}
size_t n = end - i;
size_t ok = utf8_range_ValidPrefix(ptr + i, n);
if (ok != n) return i + ok;
i += ok - 1;
}
}
return size;
}
UPB_INLINE void UPB_PRIVATE(_upb_HardenedPrintString)(txtenc* e,
const char* ptr,
size_t len) {
// Print as UTF-8, while guarding against any invalid UTF-8 in the string
// field.
//
// If in the future we have a guaranteed invariant that invalid UTF-8 will
// never be present, we could avoid the UTF-8 check here.
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\"");
const char* end = ptr + len;
while (ptr < end) {
size_t n = UPB_PRIVATE(_SkipPassthroughBytes)(ptr, end - ptr);
if (n != 0) {
UPB_PRIVATE(_upb_TextEncode_PutBytes)(e, ptr, n);
ptr += n;
if (ptr == end) break;
}
// If repeated calls to CEscape() and PrintString() are expensive, we could
// consider batching them, at the cost of some complexity.
UPB_PRIVATE(_upb_TextEncode_Escaped)(e, *ptr);
ptr++;
}
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\"");
}
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Bytes)(txtenc* e,
upb_StringView data) {
const char* ptr = data.data;
const char* end = ptr + data.size;
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\"");
for (; ptr < end; ptr++) {
unsigned char uc = *ptr;
if (UPB_PRIVATE(_upb_AsciiIsPrint)(uc)) {
UPB_PRIVATE(_upb_TextEncode_PutBytes)(e, ptr, 1);
} else {
UPB_PRIVATE(_upb_TextEncode_Escaped)(e, uc);
}
}
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\"");
}
UPB_INLINE size_t UPB_PRIVATE(_upb_TextEncode_Nullz)(txtenc* e, size_t size) {
size_t ret = e->ptr - e->buf + e->overflow;
if (size > 0) {
if (e->ptr == e->end) e->ptr--;
*e->ptr = '\0';
}
return ret;
}
const char* UPB_PRIVATE(_upb_TextEncode_Unknown)(txtenc* e, const char* ptr,
upb_EpsCopyInputStream* stream,
int groupnum);
// Must not be called for ctype = kUpb_CType_Enum, as they require different
// handling depending on whether or not we're doing reflection-based encoding.
void UPB_PRIVATE(_upb_TextEncode_Scalar)(txtenc* e, upb_MessageValue val,
upb_CType ctype);
#include "upb/port/undef.inc"
#endif // UPB_TEXT_ENCODE_INTERNAL_H_

@ -0,0 +1,22 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2024 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef UPB_TEXT_OPTIONS_H_
#define UPB_TEXT_OPTIONS_H_
enum {
// When set, prints everything on a single line.
UPB_TXTENC_SINGLELINE = 1,
// When set, unknown fields are not printed.
UPB_TXTENC_SKIPUNKNOWN = 2,
// When set, maps are *not* sorted (this avoids allocating tmp mem).
UPB_TXTENC_NOSORT = 4
};
#endif // UPB_TEXT_OPTIONS_H_
Loading…
Cancel
Save