Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
244 lines
7.7 KiB
244 lines
7.7 KiB
// Protocol Buffers - Google's data interchange format |
|
// Copyright 2023 Google LLC. All rights reserved. |
|
// |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file or at |
|
// https://developers.google.com/open-source/licenses/bsd |
|
|
|
#ifndef UPB_TEXT_ENCODE_INTERNAL_H_ |
|
#define UPB_TEXT_ENCODE_INTERNAL_H_ |
|
|
|
#include <stdarg.h> |
|
#include <string.h> |
|
|
|
#include "upb/base/descriptor_constants.h" |
|
#include "upb/base/string_view.h" |
|
#include "upb/message/array.h" |
|
#include "upb/message/internal/map_sorter.h" |
|
#include "upb/message/message.h" |
|
#include "upb/port/vsnprintf_compat.h" |
|
#include "upb/text/options.h" |
|
#include "upb/wire/eps_copy_input_stream.h" |
|
#include "utf8_range.h" |
|
|
|
// Must be last. |
|
#include "upb/port/def.inc" |
|
|
|
typedef struct { |
|
char *buf, *ptr, *end; |
|
size_t overflow; |
|
int indent_depth; |
|
int options; |
|
const struct upb_DefPool* ext_pool; |
|
_upb_mapsorter sorter; |
|
} txtenc; |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_PutBytes)(txtenc* e, |
|
const void* data, |
|
size_t len) { |
|
size_t have = e->end - e->ptr; |
|
if (UPB_LIKELY(have >= len)) { |
|
memcpy(e->ptr, data, len); |
|
e->ptr += len; |
|
} else { |
|
if (have) { |
|
memcpy(e->ptr, data, have); |
|
e->ptr += have; |
|
} |
|
e->overflow += (len - have); |
|
} |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_PutStr)(txtenc* e, |
|
const char* str) { |
|
UPB_PRIVATE(_upb_TextEncode_PutBytes)(e, str, strlen(str)); |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Printf)(txtenc* e, const char* fmt, |
|
...) { |
|
size_t n; |
|
size_t have = e->end - e->ptr; |
|
va_list args; |
|
|
|
va_start(args, fmt); |
|
n = _upb_vsnprintf(e->ptr, have, fmt, args); |
|
va_end(args); |
|
|
|
if (UPB_LIKELY(have > n)) { |
|
e->ptr += n; |
|
} else { |
|
e->ptr = UPB_PTRADD(e->ptr, have); |
|
e->overflow += (n - have); |
|
} |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Indent)(txtenc* e) { |
|
if ((e->options & UPB_TXTENC_SINGLELINE) == 0) { |
|
int i = e->indent_depth; |
|
while (i-- > 0) { |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, " "); |
|
} |
|
} |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_EndField)(txtenc* e) { |
|
if (e->options & UPB_TXTENC_SINGLELINE) { |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, " "); |
|
} else { |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\n"); |
|
} |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Escaped)(txtenc* e, |
|
unsigned char ch) { |
|
switch (ch) { |
|
case '\n': |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\n"); |
|
break; |
|
case '\r': |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\r"); |
|
break; |
|
case '\t': |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\t"); |
|
break; |
|
case '\"': |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\\""); |
|
break; |
|
case '\'': |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\'"); |
|
break; |
|
case '\\': |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\\\\"); |
|
break; |
|
default: |
|
UPB_PRIVATE(_upb_TextEncode_Printf)(e, "\\%03o", ch); |
|
break; |
|
} |
|
} |
|
|
|
// Returns true if `ch` needs to be escaped in TextFormat, independent of any |
|
// UTF-8 validity issues. |
|
UPB_INLINE bool UPB_PRIVATE(_upb_DefinitelyNeedsEscape)(unsigned char ch) { |
|
if (ch < 32) return true; |
|
switch (ch) { |
|
case '\"': |
|
case '\'': |
|
case '\\': |
|
case 127: |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
UPB_INLINE bool UPB_PRIVATE(_upb_AsciiIsPrint)(unsigned char ch) { |
|
return ch >= 32 && ch < 127; |
|
} |
|
|
|
// Returns true if this is a high byte that requires UTF-8 validation. If the |
|
// UTF-8 validation fails, we must escape the byte. |
|
UPB_INLINE bool UPB_PRIVATE(_upb_NeedsUtf8Validation)(unsigned char ch) { |
|
return ch > 127; |
|
} |
|
|
|
// Returns the number of bytes in the prefix of `val` that do not need escaping. |
|
// This is like utf8_range::SpanStructurallyValid(), except that it also |
|
// terminates at any ASCII char that needs to be escaped in TextFormat (any char |
|
// that has `DefinitelyNeedsEscape(ch) == true`). |
|
// |
|
// If we could get a variant of utf8_range::SpanStructurallyValid() that could |
|
// terminate on any of these chars, that might be more efficient, but it would |
|
// be much more complicated to modify that heavily SIMD code. |
|
UPB_INLINE size_t UPB_PRIVATE(_SkipPassthroughBytes)(const char* ptr, |
|
size_t size) { |
|
for (size_t i = 0; i < size; i++) { |
|
unsigned char uc = ptr[i]; |
|
if (UPB_PRIVATE(_upb_DefinitelyNeedsEscape)(uc)) return i; |
|
if (UPB_PRIVATE(_upb_NeedsUtf8Validation)(uc)) { |
|
// Find the end of this region of consecutive high bytes, so that we only |
|
// give high bytes to the UTF-8 checker. This avoids needing to perform |
|
// a second scan of the ASCII characters looking for characters that |
|
// need escaping. |
|
// |
|
// We assume that high bytes are less frequent than plain, printable ASCII |
|
// bytes, so we accept the double-scan of high bytes. |
|
size_t end = i + 1; |
|
for (; end < size; end++) { |
|
if (!UPB_PRIVATE(_upb_NeedsUtf8Validation)(ptr[end])) break; |
|
} |
|
size_t n = end - i; |
|
size_t ok = utf8_range_ValidPrefix(ptr + i, n); |
|
if (ok != n) return i + ok; |
|
i += ok - 1; |
|
} |
|
} |
|
return size; |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_HardenedPrintString)(txtenc* e, |
|
const char* ptr, |
|
size_t len) { |
|
// Print as UTF-8, while guarding against any invalid UTF-8 in the string |
|
// field. |
|
// |
|
// If in the future we have a guaranteed invariant that invalid UTF-8 will |
|
// never be present, we could avoid the UTF-8 check here. |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\""); |
|
const char* end = ptr + len; |
|
while (ptr < end) { |
|
size_t n = UPB_PRIVATE(_SkipPassthroughBytes)(ptr, end - ptr); |
|
if (n != 0) { |
|
UPB_PRIVATE(_upb_TextEncode_PutBytes)(e, ptr, n); |
|
ptr += n; |
|
if (ptr == end) break; |
|
} |
|
|
|
// If repeated calls to CEscape() and PrintString() are expensive, we could |
|
// consider batching them, at the cost of some complexity. |
|
UPB_PRIVATE(_upb_TextEncode_Escaped)(e, *ptr); |
|
ptr++; |
|
} |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\""); |
|
} |
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_TextEncode_Bytes)(txtenc* e, |
|
upb_StringView data) { |
|
const char* ptr = data.data; |
|
const char* end = ptr + data.size; |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\""); |
|
for (; ptr < end; ptr++) { |
|
unsigned char uc = *ptr; |
|
if (UPB_PRIVATE(_upb_AsciiIsPrint)(uc)) { |
|
UPB_PRIVATE(_upb_TextEncode_PutBytes)(e, ptr, 1); |
|
} else { |
|
UPB_PRIVATE(_upb_TextEncode_Escaped)(e, uc); |
|
} |
|
} |
|
UPB_PRIVATE(_upb_TextEncode_PutStr)(e, "\""); |
|
} |
|
|
|
UPB_INLINE size_t UPB_PRIVATE(_upb_TextEncode_Nullz)(txtenc* e, size_t size) { |
|
size_t ret = e->ptr - e->buf + e->overflow; |
|
|
|
if (size > 0) { |
|
if (e->ptr == e->end) e->ptr--; |
|
*e->ptr = '\0'; |
|
} |
|
|
|
return ret; |
|
} |
|
|
|
const char* UPB_PRIVATE(_upb_TextEncode_Unknown)(txtenc* e, const char* ptr, |
|
upb_EpsCopyInputStream* stream, |
|
int groupnum); |
|
|
|
void UPB_PRIVATE(_upb_TextEncode_ParseUnknown)(txtenc* e, |
|
const upb_Message* msg); |
|
|
|
// Must not be called for ctype = kUpb_CType_Enum, as they require different |
|
// handling depending on whether or not we're doing reflection-based encoding. |
|
void UPB_PRIVATE(_upb_TextEncode_Scalar)(txtenc* e, upb_MessageValue val, |
|
upb_CType ctype); |
|
|
|
#include "upb/port/undef.inc" |
|
|
|
#endif // UPB_TEXT_ENCODE_INTERNAL_H_
|
|
|