consolidate some general parsing functions into upb/lex/

There are several other functions which might eventually end up here and ideally become unified across json/ and text/ and io/ so this is just a first step to create the new subdir and get rid of upb/internal/

PiperOrigin-RevId: 488954926
pull/13171/head
Eric Salo 2 years ago committed by Copybara-Service
parent afd43b8ba4
commit 4d3998b54b
  1. 44
      BUILD
  2. 14
      upb/io/BUILD
  3. 6
      upb/io/tokenizer.c
  4. 2
      upb/io/tokenizer_test.cc
  5. 4
      upb/json/decode.c
  6. 2
      upb/json/encode.c
  7. 2
      upb/lex/atoi.c
  8. 6
      upb/lex/atoi.h
  9. 2
      upb/lex/atoi_test.cc
  10. 4
      upb/lex/round_trip.c
  11. 17
      upb/lex/round_trip.h
  12. 4
      upb/lex/strtod.c
  13. 8
      upb/lex/strtod.h
  14. 2
      upb/lex/unicode.c
  15. 6
      upb/lex/unicode.h
  16. 2
      upb/text/encode.c

44
BUILD

@ -114,10 +114,8 @@ cc_library(
srcs = [
"upb/collections/map_sorter_internal.h",
"upb/collections/message_value.h",
"upb/internal/unicode.h",
"upb/msg.c",
"upb/msg_internal.h",
"upb/upb.c",
"upb/wire/decode.c",
"upb/wire/encode.c",
"upb/wire/swap_internal.h",
@ -152,9 +150,9 @@ cc_library(
":extension_registry",
":fastdecode",
":hash",
":lex",
":mem",
":port",
":unicode_internal",
":wire_internal",
],
)
@ -571,9 +569,9 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":collections_internal",
":lex",
":port",
":reflection",
":wire_internal",
],
)
@ -594,13 +592,11 @@ cc_library(
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
":atoi_internal",
":collections",
":lex",
":port",
":reflection",
":unicode_internal",
":upb",
":wire_internal",
],
)
@ -812,10 +808,10 @@ upb_proto_reflection_library(
cc_test(
name = "atoi_test",
srcs = ["upb/internal/atoi_test.cc"],
srcs = ["upb/lex/atoi_test.cc"],
copts = UPB_DEFAULT_CPPOPTS,
deps = [
":atoi_internal",
":lex",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
@ -1022,15 +1018,6 @@ cc_library(
deps = [":port"],
)
cc_library(
name = "atoi_internal",
srcs = ["upb/internal/atoi.c"],
hdrs = ["upb/internal/atoi.h"],
copts = UPB_DEFAULT_COPTS,
visibility = ["//:__subpackages__"],
deps = [":port"],
)
cc_library(
name = "wire_internal",
srcs = [
@ -1042,7 +1029,6 @@ cc_library(
hdrs = [
"upb/wire/common_internal.h",
"upb/wire/decode_internal.h",
"upb/wire/encode_internal.h",
"upb/wire/swap_internal.h",
],
copts = UPB_DEFAULT_COPTS,
@ -1077,12 +1063,18 @@ cc_library(
)
cc_library(
name = "unicode_internal",
name = "lex",
srcs = [
"upb/internal/unicode.c",
"upb/lex/atoi.c",
"upb/lex/round_trip.c",
"upb/lex/strtod.c",
"upb/lex/unicode.c",
],
hdrs = [
"upb/internal/unicode.h",
"upb/lex/atoi.h",
"upb/lex/round_trip.h",
"upb/lex/strtod.h",
"upb/lex/unicode.h",
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//:__subpackages__"],
@ -1100,13 +1092,13 @@ upb_amalgamation(
"upb.h",
],
libs = [
":atoi_internal",
":base",
":collections_internal",
":descriptor_upb_proto",
":extension_registry",
":fastdecode",
":hash",
":lex",
":mem_internal",
":mini_table",
":mini_table_accessors",
@ -1134,7 +1126,6 @@ upb_amalgamation(
"php-upb.h",
],
libs = [
":atoi_internal",
":base",
":collections_internal",
":descriptor_upb_proto",
@ -1143,13 +1134,13 @@ upb_amalgamation(
":fastdecode",
":hash",
":json",
":lex",
":mem_internal",
":mini_table",
":mini_table_accessors",
":port",
":reflection",
":reflection_internal",
":unicode_internal",
":upb",
":wire_internal",
],
@ -1173,7 +1164,6 @@ upb_amalgamation(
"ruby-upb.h",
],
libs = [
":atoi_internal",
":base",
":collections_internal",
":descriptor_upb_proto",
@ -1181,13 +1171,13 @@ upb_amalgamation(
":fastdecode",
":hash",
":json",
":lex",
":mem_internal",
":mini_table",
":mini_table_accessors",
":port",
":reflection",
":reflection_internal",
":unicode_internal",
":upb",
":wire_internal",
],

@ -7,27 +7,17 @@ cc_library(
],
)
cc_library(
name = "strtod",
srcs = ["strtod.c"],
hdrs = ["strtod.h"],
deps = [
"//:port",
],
)
cc_library(
name = "tokenizer",
srcs = ["tokenizer.c"],
hdrs = ["tokenizer.h"],
deps = [
":string",
":strtod",
":zero_copy_stream",
"//:base",
"//:lex",
"//:mem",
"//:port",
"//:unicode_internal",
],
)
@ -87,8 +77,8 @@ cc_test(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"//:upb",
"//:lex",
"//:port",
"//:unicode_internal",
"@com_google_googletest//:gtest_main",
],
)

@ -27,9 +27,9 @@
#include "upb/io/tokenizer.h"
#include "upb/internal/unicode.h"
#include "upb/io/string.h"
#include "upb/io/strtod.h"
#include "upb/lex/strtod.h"
#include "upb/lex/unicode.h"
// Must be included last.
#include "upb/port/def.inc"
@ -782,7 +782,7 @@ bool upb_Parse_Integer(const char* text, uint64_t max_value, uint64_t* output) {
double upb_Parse_Float(const char* text) {
char* end;
double result = NoLocaleStrtod(text, &end);
double result = _upb_NoLocaleStrtod(text, &end);
// "1e" is not a valid float, but if the tokenizer reads it, it will
// report an error but still return it as a valid token. We need to

@ -30,9 +30,9 @@
#include "gtest/gtest.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_format.h"
#include "upb/internal/unicode.h"
#include "upb/io/chunked_input_stream.h"
#include "upb/io/string.h"
#include "upb/lex/unicode.h"
#include "upb/upb.hpp"
// Must be last.

@ -36,8 +36,8 @@
#include <string.h>
#include "upb/collections/map.h"
#include "upb/internal/atoi.h"
#include "upb/internal/unicode.h"
#include "upb/lex/atoi.h"
#include "upb/lex/unicode.h"
#include "upb/reflection/message.h"
#include "upb/wire/encode.h"

@ -35,10 +35,10 @@
#include <string.h>
#include "upb/collections/map.h"
#include "upb/lex/round_trip.h"
#include "upb/port/vsnprintf_compat.h"
#include "upb/reflection/message.h"
#include "upb/wire/decode.h"
#include "upb/wire/encode_internal.h"
// Must be last.
#include "upb/port/def.inc"

@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/internal/atoi.h"
#include "upb/lex/atoi.h"
// Must be last.
#include "upb/port/def.inc"

@ -25,8 +25,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_INTERNAL_ATOI_H_
#define UPB_INTERNAL_ATOI_H_
#ifndef UPB_LEX_ATOI_H_
#define UPB_LEX_ATOI_H_
// Must be last.
#include "upb/port/def.inc"
@ -50,4 +50,4 @@ const char* upb_BufToInt64(const char* ptr, const char* end, int64_t* val,
#include "upb/port/undef.inc"
#endif /* UPB_INTERNAL_ATOI_H_ */
#endif /* UPB_LEX_ATOI_H_ */

@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/internal/atoi.h"
#include "upb/lex/atoi.h"
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"

@ -25,11 +25,11 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/lex/round_trip.h"
#include <float.h>
#include <stdlib.h>
#include "upb/wire/encode_internal.h"
// Must be last.
#include "upb/port/def.inc"

@ -25,23 +25,24 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_WIRE_ENCODE_INTERNAL_H_
#define UPB_WIRE_ENCODE_INTERNAL_H_
#ifndef UPB_LEX_ROUND_TRIP_H_
#define UPB_LEX_ROUND_TRIP_H_
// Must be last.
#include "upb/port/def.inc"
#ifdef __cplusplus
extern "C" {
#endif
// Encodes a float or double that is round-trippable, but as short as possible.
// These routines are not fully optimal (not guaranteed to be shortest), but are
// short-ish and match the implementation that has been used in protobuf since
// the beginning.
//
// The given buffer size must be at least kUpb_RoundTripBufferSize.
enum { kUpb_RoundTripBufferSize = 32 };
#ifdef __cplusplus
extern "C" {
#endif
void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size);
void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size);
@ -51,4 +52,4 @@ void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size);
#include "upb/port/undef.inc"
#endif /* UPB_WIRE_ENCODE_INTERNAL_H_ */
#endif /* UPB_LEX_ROUND_TRIP_H_ */

@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/io/strtod.h"
#include "upb/lex/strtod.h"
#include <stdlib.h>
#include <string.h>
@ -63,7 +63,7 @@ static void LocalizeRadix(const char *input, const char *pos, char *output) {
strcpy(output + len1 + len2, input + len1 + 1);
}
double NoLocaleStrtod(const char *str, char **endptr) {
double _upb_NoLocaleStrtod(const char *str, char **endptr) {
// We cannot simply set the locale to "C" temporarily with setlocale()
// as this is not thread-safe. Instead, we try to parse in the current
// locale first. If parsing stops at a '.' character, then this is a

@ -25,8 +25,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_IO_STRTOD_H_
#define UPB_IO_STRTOD_H_
#ifndef UPB_LEX_STRTOD_H_
#define UPB_LEX_STRTOD_H_
// Must be last.
#include "upb/port/def.inc"
@ -35,7 +35,7 @@
extern "C" {
#endif
double NoLocaleStrtod(const char *str, char **endptr);
double _upb_NoLocaleStrtod(const char *str, char **endptr);
#ifdef __cplusplus
} /* extern "C" */
@ -43,4 +43,4 @@ double NoLocaleStrtod(const char *str, char **endptr);
#include "upb/port/undef.inc"
#endif /* UPB_IO_STRTOD_H_ */
#endif /* UPB_LEX_STRTOD_H_ */

@ -25,7 +25,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/internal/unicode.h"
#include "upb/lex/unicode.h"
// Must be last.
#include "upb/port/def.inc"

@ -25,8 +25,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_INTERNAL_UNICODE_H_
#define UPB_INTERNAL_UNICODE_H_
#ifndef UPB_LEX_UNICODE_H_
#define UPB_LEX_UNICODE_H_
// Must be last.
#include "upb/port/def.inc"
@ -74,4 +74,4 @@ int upb_Unicode_ToUTF8(uint32_t cp, char* out);
#include "upb/port/undef.inc"
#endif /* UPB_INTERNAL_UNICODE_H_ */
#endif /* UPB_LEX_UNICODE_H_ */

@ -35,9 +35,9 @@
#include "upb/collections/map.h"
#include "upb/collections/map_sorter_internal.h"
#include "upb/lex/round_trip.h"
#include "upb/port/vsnprintf_compat.h"
#include "upb/reflection/message.h"
#include "upb/wire/encode_internal.h"
// Must be last.
#include "upb/port/def.inc"

Loading…
Cancel
Save