diff --git a/BUILD b/BUILD index c0b6655a0b..4853b3f9d5 100644 --- a/BUILD +++ b/BUILD @@ -210,85 +210,6 @@ cc_library( ], ) -# Legacy C/C++ Libraries (not recommended for new code) ######################## - -cc_library( - name = "handlers", - srcs = [ - "upb/handlers.c", - "upb/handlers-inl.h", - "upb/sink.c", - ], - hdrs = [ - "upb/handlers.h", - "upb/sink.h", - ], - copts = UPB_DEFAULT_COPTS, - visibility = ["//tests:__pkg__"], - deps = [ - ":port", - ":reflection", - ":table", - ":upb", - ], -) - -cc_library( - name = "upb_pb", - srcs = [ - "upb/pb/compile_decoder.c", - "upb/pb/decoder.c", - "upb/pb/decoder.int.h", - "upb/pb/encoder.c", - "upb/pb/textprinter.c", - "upb/pb/varint.c", - "upb/pb/varint.int.h", - ], - hdrs = [ - "upb/pb/decoder.h", - "upb/pb/encoder.h", - "upb/pb/textprinter.h", - ], - copts = UPB_DEFAULT_COPTS, - visibility = ["//tests:__pkg__"], - deps = [ - ":descriptor_upb_proto", - ":handlers", - ":port", - ":reflection", - ":table", - ":upb", - ], -) - -# copybara:strip_for_google3_begin -cc_library( - name = "upb_json", - srcs = [ - "upb/json/parser.c", - "upb/json/printer.c", - ], - hdrs = [ - "upb/json/parser.h", - "upb/json/printer.h", - ], - copts = UPB_DEFAULT_COPTS, - visibility = ["//tests:__pkg__"], - deps = [ - ":upb", - ":upb_pb", - ], -) - -genrule( - name = "generate_json_ragel", - srcs = ["//:upb/json/parser.rl"], - outs = ["upb/json/parser.c"], - cmd = "$(location @ragel//:ragelc) -C -o upb/json/parser.c $< && mv upb/json/parser.c $@", - tools = ["@ragel//:ragelc"], - visibility = ["//cmake:__pkg__"], -) - # Amalgamation ################################################################# py_binary( @@ -308,10 +229,7 @@ upb_amalgamation( ":fastdecode", ":descriptor_upb_proto", ":reflection", - ":handlers", ":port", - ":upb_pb", - ":upb_json", ], ) diff --git a/README.md b/README.md index a08aa0e909..91147eaf09 100644 --- a/README.md +++ b/README.md @@ -111,16 +111,6 @@ Then in your `.c` file you can #include the generated header: /* Insert code that uses generated types. */ ``` -## Old "handlers" interfaces - -This library contains several semi-deprecated interfaces (see BUILD -file for more info about which interfaces are deprecated). These -deprecated interfaces are still used in some significant projects, -such as the Ruby and PHP C bindings for protobuf in the [main protobuf -repo](https://github.com/protocolbuffers/protobuf). The goal is to -migrate the Ruby/PHP bindings to use the newer, simpler interfaces -instead. Please do not use the old interfaces in new code. - ## Lua bindings This repo has some Lua bindings for the core library. These are diff --git a/WORKSPACE b/WORKSPACE index a0b04ad044..f17c4458cf 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -17,14 +17,6 @@ http_archive( ], ) -http_archive( - name = "ragel", - build_file = "//bazel:ragel.BUILD", - sha256 = "5f156edb65d20b856d638dd9ee2dfb43285914d9aa2b6ec779dac0270cd56c3f", - strip_prefix = "ragel-6.10", - urls = ["http://www.colm.net/files/ragel/ragel-6.10.tar.gz"], -) - http_archive( name = "com_google_googletest", urls = ["https://github.com/google/googletest/archive/b6cd405286ed8635ece71c72f118e659f4ade3fb.zip"], # 2019-01-07 diff --git a/bazel/ragel.BUILD b/bazel/ragel.BUILD deleted file mode 100644 index 5916bea198..0000000000 --- a/bazel/ragel.BUILD +++ /dev/null @@ -1,195 +0,0 @@ -package( - default_visibility = ["//visibility:public"], -) - -cc_binary( - name = "ragelc", - srcs = [ - "ragel/rubycodegen.cpp", - "ragel/goipgoto.h", - "ragel/cdtable.h", - "ragel/rubycodegen.h", - "ragel/gotable.h", - "ragel/gocodegen.cpp", - "ragel/rubyfflat.cpp", - "ragel/common.cpp", - "ragel/gofflat.cpp", - "ragel/cdtable.cpp", - "ragel/cdsplit.cpp", - "ragel/rlparse.cpp", - "ragel/csfgoto.cpp", - "ragel/javacodegen.cpp", - "ragel/gocodegen.h", - "ragel/mlgoto.cpp", - "ragel/fsmgraph.cpp", - "ragel/version.h", - "ragel/mlfflat.h", - "ragel/fsmgraph.h", - "ragel/fsmbase.cpp", - "ragel/fsmstate.cpp", - "ragel/gotablish.cpp", - "ragel/rubyflat.cpp", - "ragel/cdfgoto.h", - "ragel/cscodegen.h", - "ragel/mlflat.cpp", - "ragel/rubyflat.h", - "ragel/goftable.h", - "ragel/rbxgoto.cpp", - "ragel/csfflat.cpp", - "ragel/gofgoto.cpp", - "ragel/gofgoto.h", - "ragel/ragel.h", - "ragel/goftable.cpp", - "ragel/cdcodegen.cpp", - "ragel/rlparse.h", - "ragel/cdsplit.h", - "ragel/xmlcodegen.cpp", - "ragel/goipgoto.cpp", - "ragel/dotcodegen.h", - "ragel/gogoto.cpp", - "ragel/csflat.h", - "ragel/csfflat.h", - #"ragel/config.h.in", - "ragel/csipgoto.cpp", - "ragel/mltable.cpp", - "ragel/mlflat.h", - "ragel/csftable.cpp", - "ragel/cdgoto.h", - "ragel/goflat.cpp", - "ragel/rubyfflat.h", - "ragel/mlftable.h", - "ragel/rubyftable.h", - "ragel/fsmap.cpp", - "ragel/redfsm.cpp", - "ragel/goflat.h", - "ragel/parsetree.cpp", - "ragel/fsmmin.cpp", - "ragel/dotcodegen.cpp", - "ragel/redfsm.h", - "ragel/mlcodegen.cpp", - "ragel/cdfgoto.cpp", - "ragel/cssplit.cpp", - "ragel/cstable.cpp", - "ragel/javacodegen.h", - "ragel/parsedata.cpp", - "ragel/buffer.h", - "ragel/gogoto.h", - "ragel/csgoto.h", - "ragel/pcheck.h", - "ragel/rubyftable.cpp", - "ragel/csfgoto.h", - "ragel/common.h", - "ragel/cdftable.h", - "ragel/mlgoto.h", - "ragel/csgoto.cpp", - "ragel/cdflat.h", - "ragel/cdipgoto.h", - "ragel/cstable.h", - "ragel/gendata.h", - "ragel/cdfflat.cpp", - "ragel/gotable.cpp", - "ragel/cdcodegen.h", - "ragel/gendata.cpp", - "ragel/rubytable.h", - "ragel/csflat.cpp", - "ragel/inputdata.h", - "ragel/inputdata.cpp", - "ragel/rubytable.cpp", - "ragel/fsmattach.cpp", - "ragel/csipgoto.h", - "ragel/cscodegen.cpp", - "ragel/cdfflat.h", - "ragel/rbxgoto.h", - "ragel/xmlcodegen.h", - "ragel/gofflat.h", - "ragel/parsedata.h", - "ragel/mlfgoto.h", - "ragel/cdflat.cpp", - "ragel/config.h", - "ragel/rlscan.cpp", - "ragel/mlcodegen.h", - "ragel/mlfflat.cpp", - "ragel/mlftable.cpp", - "ragel/mltable.h", - "ragel/cdipgoto.cpp", - "ragel/cdftable.cpp", - "ragel/parsetree.h", - "ragel/rlscan.h", - "ragel/main.cpp", - "ragel/cssplit.h", - "ragel/mlfgoto.cpp", - "ragel/csftable.h", - "ragel/gotablish.h", - "ragel/cdgoto.cpp", - "aapl/avlmelkey.h", - "aapl/dlistmel.h", - "aapl/avliset.h", - "aapl/avlkeyless.h", - "aapl/sbstset.h", - "aapl/sbsttable.h", - "aapl/quicksort.h", - "aapl/avlitree.h", - "aapl/avlcommon.h", - "aapl/bstset.h", - "aapl/avlmel.h", - "aapl/insertsort.h", - "aapl/dlist.h", - "aapl/avlmap.h", - "aapl/mergesort.h", - "aapl/resize.h", - "aapl/bstcommon.h", - "aapl/bstmap.h", - "aapl/compare.h", - "aapl/svector.h", - "aapl/avlset.h", - "aapl/bsttable.h", - "aapl/avlikeyless.h", - "aapl/bubblesort.h", - "aapl/table.h", - "aapl/avlbasic.h", - "aapl/vector.h", - "aapl/avlimap.h", - "aapl/dlistval.h", - "aapl/dlcommon.h", - "aapl/avlibasic.h", - "aapl/sbstmap.h", - "aapl/avlimel.h", - "aapl/avlimelkey.h", - "aapl/avltree.h", - ], - includes = [ - "aapl", - "ragel", - ], -) - -config_h_contents = """ -#define PACKAGE "ragel" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "ragel" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ragel 6.10" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "ragel" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "6.10" - -/* Version number of package */ -#define VERSION "6.10" -""" - -genrule( - name = "gen_config_h", - outs = ["ragel/config.h"], - cmd = "(cat <<'HEREDOC'\n%s\nHEREDOC\n) > $@" % config_h_contents, -) diff --git a/cmake/BUILD b/cmake/BUILD index 53fbd07abe..052ea912a6 100644 --- a/cmake/BUILD +++ b/cmake/BUILD @@ -36,13 +36,6 @@ genrule( tools = [":make_cmakelists"], ) -genrule( - name = "copy_json_ragel", - srcs = ["//:upb/json/parser.c"], - outs = ["generated-in/upb/json/parser.c"], - cmd = "cp $< $@", -) - genrule( name = "copy_protos", srcs = ["//:descriptor_upb_proto"], @@ -59,7 +52,6 @@ generated_file_staleness_test( "CMakeLists.txt", "google/protobuf/descriptor.upb.c", "google/protobuf/descriptor.upb.h", - "upb/json/parser.c", ], generated_pattern = "generated-in/%s", ) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 2b1d457423..375328d1ae 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -124,43 +124,6 @@ target_link_libraries(json add_library(table INTERFACE) target_link_libraries(table INTERFACE port) -add_library(handlers - ../upb/handlers.c - ../upb/handlers-inl.h - ../upb/sink.c - ../upb/handlers.h - ../upb/sink.h) -target_link_libraries(handlers - port - reflection - table - upb) -add_library(upb_pb - ../upb/pb/compile_decoder.c - ../upb/pb/decoder.c - ../upb/pb/decoder.int.h - ../upb/pb/encoder.c - ../upb/pb/textprinter.c - ../upb/pb/varint.c - ../upb/pb/varint.int.h - ../upb/pb/decoder.h - ../upb/pb/encoder.h - ../upb/pb/textprinter.h) -target_link_libraries(upb_pb - descriptor_upb_proto - handlers - port - reflection - table - upb) -add_library(upb_json - ../cmake/upb/json/parser.c - ../upb/json/printer.c - ../upb/json/parser.h - ../upb/json/printer.h) -target_link_libraries(upb_json - upb - upb_pb) add_library(wyhash INTERFACE) diff --git a/cmake/upb/json/parser.c b/cmake/upb/json/parser.c deleted file mode 100644 index bbd66d33eb..0000000000 --- a/cmake/upb/json/parser.c +++ /dev/null @@ -1,3435 +0,0 @@ - -#line 1 "upb/json/parser.rl" -/* -** upb::json::Parser (upb_json_parser) -** -** A parser that uses the Ragel State Machine Compiler to generate -** the finite automata. -** -** Ragel only natively handles regular languages, but we can manually -** program it a bit to handle context-free languages like JSON, by using -** the "fcall" and "fret" constructs. -** -** This parser can handle the basics, but needs several things to be fleshed -** out: -** -** - handling of unicode escape sequences (including high surrogate pairs). -** - properly check and report errors for unknown fields, stack overflow, -** improper array nesting (or lack of nesting). -** - handling of base64 sequences with padding characters. -** - handling of push-back (non-success returns from sink functions). -** - handling of keys/escape-sequences/etc that span input buffers. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "upb/json/parser.h" -#include "upb/pb/encoder.h" - -#include "upb/port_def.inc" - -#define UPB_JSON_MAX_DEPTH 64 - -/* Type of value message */ -enum { - VALUE_NULLVALUE = 0, - VALUE_NUMBERVALUE = 1, - VALUE_STRINGVALUE = 2, - VALUE_BOOLVALUE = 3, - VALUE_STRUCTVALUE = 4, - VALUE_LISTVALUE = 5 -}; - -/* Forward declare */ -static bool is_top_level(upb_json_parser *p); -static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type); -static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type); - -static bool is_number_wrapper_object(upb_json_parser *p); -static bool does_number_wrapper_start(upb_json_parser *p); -static bool does_number_wrapper_end(upb_json_parser *p); - -static bool is_string_wrapper_object(upb_json_parser *p); -static bool does_string_wrapper_start(upb_json_parser *p); -static bool does_string_wrapper_end(upb_json_parser *p); - -static bool does_fieldmask_start(upb_json_parser *p); -static bool does_fieldmask_end(upb_json_parser *p); -static void start_fieldmask_object(upb_json_parser *p); -static void end_fieldmask_object(upb_json_parser *p); - -static void start_wrapper_object(upb_json_parser *p); -static void end_wrapper_object(upb_json_parser *p); - -static void start_value_object(upb_json_parser *p, int value_type); -static void end_value_object(upb_json_parser *p); - -static void start_listvalue_object(upb_json_parser *p); -static void end_listvalue_object(upb_json_parser *p); - -static void start_structvalue_object(upb_json_parser *p); -static void end_structvalue_object(upb_json_parser *p); - -static void start_object(upb_json_parser *p); -static void end_object(upb_json_parser *p); - -static void start_any_object(upb_json_parser *p, const char *ptr); -static bool end_any_object(upb_json_parser *p, const char *ptr); - -static bool start_subobject(upb_json_parser *p); -static void end_subobject(upb_json_parser *p); - -static void start_member(upb_json_parser *p); -static void end_member(upb_json_parser *p); -static bool end_membername(upb_json_parser *p); - -static void start_any_member(upb_json_parser *p, const char *ptr); -static void end_any_member(upb_json_parser *p, const char *ptr); -static bool end_any_membername(upb_json_parser *p); - -size_t parse(void *closure, const void *hd, const char *buf, size_t size, - const upb_bufhandle *handle); -static bool end(void *closure, const void *hd); - -static const char eof_ch = 'e'; - -/* stringsink */ -typedef struct { - upb_byteshandler handler; - upb_bytessink sink; - char *ptr; - size_t len, size; -} upb_stringsink; - - -static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { - upb_stringsink *sink = _sink; - sink->len = 0; - UPB_UNUSED(hd); - UPB_UNUSED(size_hint); - return sink; -} - -static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, - size_t len, const upb_bufhandle *handle) { - upb_stringsink *sink = _sink; - size_t new_size = sink->size; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - while (sink->len + len > new_size) { - new_size *= 2; - } - - if (new_size != sink->size) { - sink->ptr = realloc(sink->ptr, new_size); - sink->size = new_size; - } - - memcpy(sink->ptr + sink->len, ptr, len); - sink->len += len; - - return len; -} - -void upb_stringsink_init(upb_stringsink *sink) { - upb_byteshandler_init(&sink->handler); - upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); - upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); - - upb_bytessink_reset(&sink->sink, &sink->handler, sink); - - sink->size = 32; - sink->ptr = malloc(sink->size); - sink->len = 0; -} - -void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); } - -typedef struct { - /* For encoding Any value field in binary format. */ - upb_handlercache *encoder_handlercache; - upb_stringsink stringsink; - - /* For decoding Any value field in json format. */ - upb_json_codecache *parser_codecache; - upb_sink sink; - upb_json_parser *parser; - - /* Mark the range of uninterpreted values in json input before type url. */ - const char *before_type_url_start; - const char *before_type_url_end; - - /* Mark the range of uninterpreted values in json input after type url. */ - const char *after_type_url_start; -} upb_jsonparser_any_frame; - -typedef struct { - upb_sink sink; - - /* The current message in which we're parsing, and the field whose value we're - * expecting next. */ - const upb_msgdef *m; - const upb_fielddef *f; - - /* The table mapping json name to fielddef for this message. */ - const upb_strtable *name_table; - - /* We are in a repeated-field context. We need this flag to decide whether to - * handle the array as a normal repeated field or a - * google.protobuf.ListValue/google.protobuf.Value. */ - bool is_repeated; - - /* We are in a repeated-field context, ready to emit mapentries as - * submessages. This flag alters the start-of-object (open-brace) behavior to - * begin a sequence of mapentry messages rather than a single submessage. */ - bool is_map; - - /* We are in a map-entry message context. This flag is set when parsing the - * value field of a single map entry and indicates to all value-field parsers - * (subobjects, strings, numbers, and bools) that the map-entry submessage - * should end as soon as the value is parsed. */ - bool is_mapentry; - - /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent - * message's map field that we're currently parsing. This differs from |f| - * because |f| is the field in the *current* message (i.e., the map-entry - * message itself), not the parent's field that leads to this map. */ - const upb_fielddef *mapfield; - - /* We are in an Any message context. This flag is set when parsing the Any - * message and indicates to all field parsers (subobjects, strings, numbers, - * and bools) that the parsed field should be serialized as binary data or - * cached (type url not found yet). */ - bool is_any; - - /* The type of packed message in Any. */ - upb_jsonparser_any_frame *any_frame; - - /* True if the field to be parsed is unknown. */ - bool is_unknown_field; -} upb_jsonparser_frame; - -static void init_frame(upb_jsonparser_frame* frame) { - frame->m = NULL; - frame->f = NULL; - frame->name_table = NULL; - frame->is_repeated = false; - frame->is_map = false; - frame->is_mapentry = false; - frame->mapfield = NULL; - frame->is_any = false; - frame->any_frame = NULL; - frame->is_unknown_field = false; -} - -struct upb_json_parser { - upb_arena *arena; - const upb_json_parsermethod *method; - upb_bytessink input_; - - /* Stack to track the JSON scopes we are in. */ - upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; - upb_jsonparser_frame *top; - upb_jsonparser_frame *limit; - - upb_status *status; - - /* Ragel's internal parsing stack for the parsing state machine. */ - int current_state; - int parser_stack[UPB_JSON_MAX_DEPTH]; - int parser_top; - - /* The handle for the current buffer. */ - const upb_bufhandle *handle; - - /* Accumulate buffer. See details in parser.rl. */ - const char *accumulated; - size_t accumulated_len; - char *accumulate_buf; - size_t accumulate_buf_size; - - /* Multi-part text data. See details in parser.rl. */ - int multipart_state; - upb_selector_t string_selector; - - /* Input capture. See details in parser.rl. */ - const char *capture; - - /* Intermediate result of parsing a unicode escape sequence. */ - uint32_t digit; - - /* For resolve type url in Any. */ - const upb_symtab *symtab; - - /* Whether to proceed if unknown field is met. */ - bool ignore_json_unknown; - - /* Cache for parsing timestamp due to base and zone are handled in different - * handlers. */ - struct tm tm; -}; - -static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) { - upb_jsonparser_frame *inner; - inner = p->top + 1; - init_frame(inner); - return inner; -} - -struct upb_json_codecache { - upb_arena *arena; - upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */ -}; - -struct upb_json_parsermethod { - const upb_json_codecache *cache; - upb_byteshandler input_handler_; - - /* Maps json_name -> fielddef */ - upb_strtable name_table; -}; - -#define PARSER_CHECK_RETURN(x) if (!(x)) return false - -static upb_jsonparser_any_frame *json_parser_any_frame_new( - upb_json_parser *p) { - upb_jsonparser_any_frame *frame; - - frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame)); - - frame->encoder_handlercache = upb_pb_encoder_newcache(); - frame->parser_codecache = upb_json_codecache_new(); - frame->parser = NULL; - frame->before_type_url_start = NULL; - frame->before_type_url_end = NULL; - frame->after_type_url_start = NULL; - - upb_stringsink_init(&frame->stringsink); - - return frame; -} - -static void json_parser_any_frame_set_payload_type( - upb_json_parser *p, - upb_jsonparser_any_frame *frame, - const upb_msgdef *payload_type) { - const upb_handlers *h; - const upb_json_parsermethod *parser_method; - upb_pb_encoder *encoder; - - /* Initialize encoder. */ - h = upb_handlercache_get(frame->encoder_handlercache, payload_type); - encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink); - - /* Initialize parser. */ - parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type); - upb_sink_reset(&frame->sink, h, encoder); - frame->parser = - upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink, - p->status, p->ignore_json_unknown); -} - -static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) { - upb_handlercache_free(frame->encoder_handlercache); - upb_json_codecache_free(frame->parser_codecache); - upb_stringsink_uninit(&frame->stringsink); -} - -static bool json_parser_any_frame_has_type_url( - upb_jsonparser_any_frame *frame) { - return frame->parser != NULL; -} - -static bool json_parser_any_frame_has_value_before_type_url( - upb_jsonparser_any_frame *frame) { - return frame->before_type_url_start != frame->before_type_url_end; -} - -static bool json_parser_any_frame_has_value_after_type_url( - upb_jsonparser_any_frame *frame) { - return frame->after_type_url_start != NULL; -} - -static bool json_parser_any_frame_has_value( - upb_jsonparser_any_frame *frame) { - return json_parser_any_frame_has_value_before_type_url(frame) || - json_parser_any_frame_has_value_after_type_url(frame); -} - -static void json_parser_any_frame_set_before_type_url_end( - upb_jsonparser_any_frame *frame, - const char *ptr) { - if (frame->parser == NULL) { - frame->before_type_url_end = ptr; - } -} - -static void json_parser_any_frame_set_after_type_url_start_once( - upb_jsonparser_any_frame *frame, - const char *ptr) { - if (json_parser_any_frame_has_type_url(frame) && - frame->after_type_url_start == NULL) { - frame->after_type_url_start = ptr; - } -} - -/* Used to signal that a capture has been suspended. */ -static char suspend_capture; - -static upb_selector_t getsel_for_handlertype(upb_json_parser *p, - upb_handlertype_t type) { - upb_selector_t sel; - bool ok = upb_handlers_getselector(p->top->f, type, &sel); - UPB_ASSUME(ok); - return sel; -} - -static upb_selector_t parser_getsel(upb_json_parser *p) { - return getsel_for_handlertype( - p, upb_handlers_getprimitivehandlertype(p->top->f)); -} - -static bool check_stack(upb_json_parser *p) { - if ((p->top + 1) == p->limit) { - upb_status_seterrmsg(p->status, "Nesting too deep"); - return false; - } - - return true; -} - -static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { - upb_value v; - const upb_json_codecache *cache = p->method->cache; - bool ok; - const upb_json_parsermethod *method; - - ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v); - UPB_ASSUME(ok); - method = upb_value_getconstptr(v); - - frame->name_table = &method->name_table; -} - -/* There are GCC/Clang built-ins for overflow checking which we could start - * using if there was any performance benefit to it. */ - -static bool checked_add(size_t a, size_t b, size_t *c) { - if (SIZE_MAX - a < b) return false; - *c = a + b; - return true; -} - -static size_t saturating_multiply(size_t a, size_t b) { - /* size_t is unsigned, so this is defined behavior even on overflow. */ - size_t ret = a * b; - if (b != 0 && ret / b != a) { - ret = SIZE_MAX; - } - return ret; -} - - -/* Base64 decoding ************************************************************/ - -/* TODO(haberman): make this streaming. */ - -static const signed char b64table[] = { - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, - 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, - 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, - -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, - 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, - 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, - -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, - 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, - 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, - 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1 -}; - -/* Returns the table value sign-extended to 32 bits. Knowing that the upper - * bits will be 1 for unrecognized characters makes it easier to check for - * this error condition later (see below). */ -int32_t b64lookup(unsigned char ch) { return b64table[ch]; } - -/* Returns true if the given character is not a valid base64 character or - * padding. */ -bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } - -static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, - size_t len) { - const char *limit = ptr + len; - for (; ptr < limit; ptr += 4) { - uint32_t val; - char output[3]; - - if (limit - ptr < 4) { - upb_status_seterrf(p->status, - "Base64 input for bytes field not a multiple of 4: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6 | - b64lookup(ptr[3]); - - /* Test the upper bit; returns true if any of the characters returned -1. */ - if (val & 0x80000000) { - goto otherchar; - } - - output[0] = val >> 16; - output[1] = (val >> 8) & 0xff; - output[2] = val & 0xff; - upb_sink_putstring(p->top->sink, sel, output, 3, NULL); - } - return true; - -otherchar: - if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) || - nonbase64(ptr[3]) ) { - upb_status_seterrf(p->status, - "Non-base64 characters in bytes field: %s", - upb_fielddef_name(p->top->f)); - return false; - } if (ptr[2] == '=') { - uint32_t val; - char output; - - /* Last group contains only two input bytes, one output byte. */ - if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { - goto badpadding; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12; - - UPB_ASSERT(!(val & 0x80000000)); - output = val >> 16; - upb_sink_putstring(p->top->sink, sel, &output, 1, NULL); - return true; - } else { - uint32_t val; - char output[2]; - - /* Last group contains only three input bytes, two output bytes. */ - if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { - goto badpadding; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6; - - output[0] = val >> 16; - output[1] = (val >> 8) & 0xff; - upb_sink_putstring(p->top->sink, sel, output, 2, NULL); - return true; - } - -badpadding: - upb_status_seterrf(p->status, - "Incorrect base64 padding for field: %s (%.*s)", - upb_fielddef_name(p->top->f), - 4, ptr); - return false; -} - - -/* Accumulate buffer **********************************************************/ - -/* Functionality for accumulating a buffer. - * - * Some parts of the parser need an entire value as a contiguous string. For - * example, to look up a member name in a hash table, or to turn a string into - * a number, the relevant library routines need the input string to be in - * contiguous memory, even if the value spanned two or more buffers in the - * input. These routines handle that. - * - * In the common case we can just point to the input buffer to get this - * contiguous string and avoid any actual copy. So we optimistically begin - * this way. But there are a few cases where we must instead copy into a - * separate buffer: - * - * 1. The string was not contiguous in the input (it spanned buffers). - * - * 2. The string included escape sequences that need to be interpreted to get - * the true value in a contiguous buffer. */ - -static void assert_accumulate_empty(upb_json_parser *p) { - UPB_ASSERT(p->accumulated == NULL); - UPB_ASSERT(p->accumulated_len == 0); -} - -static void accumulate_clear(upb_json_parser *p) { - p->accumulated = NULL; - p->accumulated_len = 0; -} - -/* Used internally by accumulate_append(). */ -static bool accumulate_realloc(upb_json_parser *p, size_t need) { - void *mem; - size_t old_size = p->accumulate_buf_size; - size_t new_size = UPB_MAX(old_size, 128); - while (new_size < need) { - new_size = saturating_multiply(new_size, 2); - } - - mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size); - if (!mem) { - upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); - return false; - } - - p->accumulate_buf = mem; - p->accumulate_buf_size = new_size; - return true; -} - -/* Logically appends the given data to the append buffer. - * If "can_alias" is true, we will try to avoid actually copying, but the buffer - * must be valid until the next accumulate_append() call (if any). */ -static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, - bool can_alias) { - size_t need; - - if (!p->accumulated && can_alias) { - p->accumulated = buf; - p->accumulated_len = len; - return true; - } - - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } - - if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { - return false; - } - - if (p->accumulated != p->accumulate_buf) { - if (p->accumulated_len) { - memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); - } - p->accumulated = p->accumulate_buf; - } - - memcpy(p->accumulate_buf + p->accumulated_len, buf, len); - p->accumulated_len += len; - return true; -} - -/* Returns a pointer to the data accumulated since the last accumulate_clear() - * call, and writes the length to *len. This with point either to the input - * buffer or a temporary accumulate buffer. */ -static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { - UPB_ASSERT(p->accumulated); - *len = p->accumulated_len; - return p->accumulated; -} - - -/* Mult-part text data ********************************************************/ - -/* When we have text data in the input, it can often come in multiple segments. - * For example, there may be some raw string data followed by an escape - * sequence. The two segments are processed with different logic. Also buffer - * seams in the input can cause multiple segments. - * - * As we see segments, there are two main cases for how we want to process them: - * - * 1. we want to push the captured input directly to string handlers. - * - * 2. we need to accumulate all the parts into a contiguous buffer for further - * processing (field name lookup, string->number conversion, etc). */ - -/* This is the set of states for p->multipart_state. */ -enum { - /* We are not currently processing multipart data. */ - MULTIPART_INACTIVE = 0, - - /* We are processing multipart data by accumulating it into a contiguous - * buffer. */ - MULTIPART_ACCUMULATE = 1, - - /* We are processing multipart data by pushing each part directly to the - * current string handlers. */ - MULTIPART_PUSHEAGERLY = 2 -}; - -/* Start a multi-part text value where we accumulate the data for processing at - * the end. */ -static void multipart_startaccum(upb_json_parser *p) { - assert_accumulate_empty(p); - UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); - p->multipart_state = MULTIPART_ACCUMULATE; -} - -/* Start a multi-part text value where we immediately push text data to a string - * value with the given selector. */ -static void multipart_start(upb_json_parser *p, upb_selector_t sel) { - assert_accumulate_empty(p); - UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); - p->multipart_state = MULTIPART_PUSHEAGERLY; - p->string_selector = sel; -} - -static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, - bool can_alias) { - switch (p->multipart_state) { - case MULTIPART_INACTIVE: - upb_status_seterrmsg( - p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); - return false; - - case MULTIPART_ACCUMULATE: - if (!accumulate_append(p, buf, len, can_alias)) { - return false; - } - break; - - case MULTIPART_PUSHEAGERLY: { - const upb_bufhandle *handle = can_alias ? p->handle : NULL; - upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle); - break; - } - } - - return true; -} - -/* Note: this invalidates the accumulate buffer! Call only after reading its - * contents. */ -static void multipart_end(upb_json_parser *p) { - /* This is false sometimes. Probably a bug of some sort, but this code is - * intended for deletion soon. */ - /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */ - p->multipart_state = MULTIPART_INACTIVE; - accumulate_clear(p); -} - - -/* Input capture **************************************************************/ - -/* Functionality for capturing a region of the input as text. Gracefully - * handles the case where a buffer seam occurs in the middle of the captured - * region. */ - -static void capture_begin(upb_json_parser *p, const char *ptr) { - UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); - UPB_ASSERT(p->capture == NULL); - p->capture = ptr; -} - -static bool capture_end(upb_json_parser *p, const char *ptr) { - UPB_ASSERT(p->capture); - if (multipart_text(p, p->capture, ptr - p->capture, true)) { - p->capture = NULL; - return true; - } else { - return false; - } -} - -/* This is called at the end of each input buffer (ie. when we have hit a - * buffer seam). If we are in the middle of capturing the input, this - * processes the unprocessed capture region. */ -static void capture_suspend(upb_json_parser *p, const char **ptr) { - if (!p->capture) return; - - if (multipart_text(p, p->capture, *ptr - p->capture, false)) { - /* We use this as a signal that we were in the middle of capturing, and - * that capturing should resume at the beginning of the next buffer. - * - * We can't use *ptr here, because we have no guarantee that this pointer - * will be valid when we resume (if the underlying memory is freed, then - * using the pointer at all, even to compare to NULL, is likely undefined - * behavior). */ - p->capture = &suspend_capture; - } else { - /* Need to back up the pointer to the beginning of the capture, since - * we were not able to actually preserve it. */ - *ptr = p->capture; - } -} - -static void capture_resume(upb_json_parser *p, const char *ptr) { - if (p->capture) { - UPB_ASSERT(p->capture == &suspend_capture); - p->capture = ptr; - } -} - - -/* Callbacks from the parser **************************************************/ - -/* These are the functions called directly from the parser itself. - * We define these in the same order as their declarations in the parser. */ - -static char escape_char(char in) { - switch (in) { - case 'r': return '\r'; - case 't': return '\t'; - case 'n': return '\n'; - case 'f': return '\f'; - case 'b': return '\b'; - case '/': return '/'; - case '"': return '"'; - case '\\': return '\\'; - default: - UPB_ASSERT(0); - return 'x'; - } -} - -static bool escape(upb_json_parser *p, const char *ptr) { - char ch = escape_char(*ptr); - return multipart_text(p, &ch, 1, false); -} - -static void start_hex(upb_json_parser *p) { - p->digit = 0; -} - -static void hexdigit(upb_json_parser *p, const char *ptr) { - char ch = *ptr; - - p->digit <<= 4; - - if (ch >= '0' && ch <= '9') { - p->digit += (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - p->digit += ((ch - 'a') + 10); - } else { - UPB_ASSERT(ch >= 'A' && ch <= 'F'); - p->digit += ((ch - 'A') + 10); - } -} - -static bool end_hex(upb_json_parser *p) { - uint32_t codepoint = p->digit; - - /* emit the codepoint as UTF-8. */ - char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ - int length = 0; - if (codepoint <= 0x7F) { - utf8[0] = codepoint; - length = 1; - } else if (codepoint <= 0x07FF) { - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x1F) | 0xC0; - length = 2; - } else /* codepoint <= 0xFFFF */ { - utf8[2] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x0F) | 0xE0; - length = 3; - } - /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate - * we have to wait for the next escape to get the full code point). */ - - return multipart_text(p, utf8, length, false); -} - -static void start_text(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_text(upb_json_parser *p, const char *ptr) { - return capture_end(p, ptr); -} - -static bool start_number(upb_json_parser *p, const char *ptr) { - if (is_top_level(p)) { - if (is_number_wrapper_object(p)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_NUMBERVALUE); - } else { - return false; - } - } else if (does_number_wrapper_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_NUMBERVALUE); - } - - multipart_startaccum(p); - capture_begin(p, ptr); - return true; -} - -static bool parse_number(upb_json_parser *p, bool is_quoted); - -static bool end_number_nontop(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - - if (p->top->f == NULL) { - multipart_end(p); - return true; - } - - return parse_number(p, false); -} - -static bool end_number(upb_json_parser *p, const char *ptr) { - if (!end_number_nontop(p, ptr)) { - return false; - } - - if (does_number_wrapper_end(p)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -/* |buf| is NULL-terminated. |buf| itself will never include quotes; - * |is_quoted| tells us whether this text originally appeared inside quotes. */ -static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, - bool is_quoted) { - size_t len = strlen(buf); - const char *bufend = buf + len; - char *end; - upb_fieldtype_t type = upb_fielddef_type(p->top->f); - double val; - double dummy; - double inf = INFINITY; - - errno = 0; - - if (len == 0 || buf[0] == ' ') { - return false; - } - - /* For integer types, first try parsing with integer-specific routines. - * If these succeed, they will be more accurate for int64/uint64 than - * strtod(). - */ - switch (type) { - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: { - long val = strtol(buf, &end, 0); - if (errno == ERANGE || end != bufend) { - break; - } else if (val > INT32_MAX || val < INT32_MIN) { - return false; - } else { - upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val); - return true; - } - UPB_UNREACHABLE(); - } - case UPB_TYPE_UINT32: { - unsigned long val = strtoul(buf, &end, 0); - if (end != bufend) { - break; - } else if (val > UINT32_MAX || errno == ERANGE) { - return false; - } else { - upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val); - return true; - } - UPB_UNREACHABLE(); - } - /* XXX: We can't handle [u]int64 properly on 32-bit machines because - * strto[u]ll isn't in C89. */ - case UPB_TYPE_INT64: { - long val = strtol(buf, &end, 0); - if (errno == ERANGE || end != bufend) { - break; - } else { - upb_sink_putint64(p->top->sink, parser_getsel(p), val); - return true; - } - UPB_UNREACHABLE(); - } - case UPB_TYPE_UINT64: { - unsigned long val = strtoul(p->accumulated, &end, 0); - if (end != bufend) { - break; - } else if (errno == ERANGE) { - return false; - } else { - upb_sink_putuint64(p->top->sink, parser_getsel(p), val); - return true; - } - UPB_UNREACHABLE(); - } - default: - break; - } - - if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { - /* Quoted numbers for integer types are not allowed to be in double form. */ - return false; - } - - if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { - /* C89 does not have an INFINITY macro. */ - val = inf; - } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { - val = -inf; - } else { - val = strtod(buf, &end); - if (errno == ERANGE || end != bufend) { - return false; - } - } - - switch (type) { -#define CASE(capitaltype, smalltype, ctype, min, max) \ - case UPB_TYPE_ ## capitaltype: { \ - if (modf(val, &dummy) != 0 || val > max || val < min) { \ - return false; \ - } else { \ - upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \ - (ctype)val); \ - return true; \ - } \ - break; \ - } - case UPB_TYPE_ENUM: - CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); - CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); - CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); - CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); -#undef CASE - - case UPB_TYPE_DOUBLE: - upb_sink_putdouble(p->top->sink, parser_getsel(p), val); - return true; - case UPB_TYPE_FLOAT: - if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { - return false; - } else { - upb_sink_putfloat(p->top->sink, parser_getsel(p), val); - return true; - } - default: - return false; - } -} - -static bool parse_number(upb_json_parser *p, bool is_quoted) { - size_t len; - const char *buf; - - /* strtol() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - if (!multipart_text(p, "\0", 1, false)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (parse_number_from_buffer(p, buf, is_quoted)) { - multipart_end(p); - return true; - } else { - upb_status_seterrf(p->status, "error parsing number: %s", buf); - multipart_end(p); - return false; - } -} - -static bool parser_putbool(upb_json_parser *p, bool val) { - bool ok; - - if (p->top->f == NULL) { - return true; - } - - if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { - upb_status_seterrf(p->status, - "Boolean value specified for non-bool field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val); - UPB_ASSERT(ok); - - return true; -} - -static bool end_bool(upb_json_parser *p, bool val) { - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_BOOLVALUE); - } else { - return false; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_BOOLVALUE); - } - - if (p->top->is_unknown_field) { - return true; - } - - if (!parser_putbool(p, val)) { - return false; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -static bool end_null(upb_json_parser *p) { - const char *zero_ptr = "0"; - - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_NULLVALUE); - } else { - return true; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_NULLVALUE); - } else { - return true; - } - - /* Fill null_value field. */ - multipart_startaccum(p); - capture_begin(p, zero_ptr); - capture_end(p, zero_ptr + 1); - parse_number(p, false); - - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - - return true; -} - -static bool start_any_stringval(upb_json_parser *p) { - multipart_startaccum(p); - return true; -} - -static bool start_stringval(upb_json_parser *p) { - if (is_top_level(p)) { - if (is_string_wrapper_object(p) || - is_number_wrapper_object(p)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { - start_fieldmask_object(p); - return true; - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { - start_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_STRINGVALUE); - } else { - return false; - } - } else if (does_string_wrapper_start(p) || - does_number_wrapper_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (does_fieldmask_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_fieldmask_object(p); - return true; - } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) { - if (!start_subobject(p)) { - return false; - } - start_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_STRINGVALUE); - } - - if (p->top->f == NULL) { - multipart_startaccum(p); - return true; - } - - if (p->top->is_any) { - return start_any_stringval(p); - } - - if (upb_fielddef_isstring(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (!check_stack(p)) return false; - - /* Start a new parser frame: parser frames correspond one-to-one with - * handler frames, and string events occur in a sub-frame. */ - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { - /* For STRING fields we push data directly to the handlers as it is - * parsed. We don't do this yet for BYTES fields, because our base64 - * decoder is not streaming. - * - * TODO(haberman): make base64 decoding streaming also. */ - multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); - return true; - } else { - multipart_startaccum(p); - return true; - } - } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && - upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { - /* No need to push a frame -- numeric values in quotes remain in the - * current parser frame. These values must accmulate so we can convert - * them all at once at the end. */ - multipart_startaccum(p); - return true; - } else { - upb_status_seterrf(p->status, - "String specified for bool or submessage field: %s", - upb_fielddef_name(p->top->f)); - return false; - } -} - -static bool end_any_stringval(upb_json_parser *p) { - size_t len; - const char *buf = accumulate_getptr(p, &len); - - /* Set type_url */ - upb_selector_t sel; - upb_jsonparser_frame *inner; - if (!check_stack(p)) return false; - inner = p->top + 1; - - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(inner->sink, sel, buf, len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(inner->sink, sel); - - multipart_end(p); - - /* Resolve type url */ - if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) { - const upb_msgdef *payload_type = NULL; - buf += 20; - len -= 20; - - payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len); - if (payload_type == NULL) { - upb_status_seterrf( - p->status, "Cannot find packed type: %.*s\n", (int)len, buf); - return false; - } - - json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type); - - return true; - } else { - upb_status_seterrf( - p->status, "Invalid type url: %.*s\n", (int)len, buf); - return false; - } -} - -static bool end_stringval_nontop(upb_json_parser *p) { - bool ok = true; - - if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { - multipart_end(p); - return true; - } - - if (p->top->f == NULL) { - multipart_end(p); - return true; - } - - if (p->top->is_any) { - return end_any_stringval(p); - } - - switch (upb_fielddef_type(p->top->f)) { - case UPB_TYPE_BYTES: - if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), - p->accumulated, p->accumulated_len)) { - return false; - } - /* Fall through. */ - - case UPB_TYPE_STRING: { - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(p->top->sink, sel); - p->top--; - break; - } - - case UPB_TYPE_ENUM: { - /* Resolve enum symbolic name to integer value. */ - const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f); - - size_t len; - const char *buf = accumulate_getptr(p, &len); - - int32_t int_val = 0; - ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); - - if (ok) { - upb_selector_t sel = parser_getsel(p); - upb_sink_putint32(p->top->sink, sel, int_val); - } else { - if (p->ignore_json_unknown) { - ok = true; - /* TODO(teboring): Should also clean this field. */ - } else { - upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", (int)len, - buf); - } - } - - break; - } - - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - case UPB_TYPE_FLOAT: - ok = parse_number(p, true); - break; - - default: - UPB_ASSERT(false); - upb_status_seterrmsg(p->status, "Internal error in JSON decoder"); - ok = false; - break; - } - - multipart_end(p); - - return ok; -} - -static bool end_stringval(upb_json_parser *p) { - /* FieldMask's stringvals have been ended when handling them. Only need to - * close FieldMask here.*/ - if (does_fieldmask_end(p)) { - end_fieldmask_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (!end_stringval_nontop(p)) { - return false; - } - - if (does_string_wrapper_end(p) || - does_number_wrapper_end(p)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) || - is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { - end_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -static void start_duration_base(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_duration_base(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - char seconds_buf[14]; - char nanos_buf[12]; - char *end; - int64_t seconds = 0; - int32_t nanos = 0; - double val = 0.0; - const char *seconds_membername = "seconds"; - const char *nanos_membername = "nanos"; - size_t fraction_start; - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - memset(seconds_buf, 0, 14); - memset(nanos_buf, 0, 12); - - /* Find out base end. The maximus duration is 315576000000, which cannot be - * represented by double without losing precision. Thus, we need to handle - * fraction and base separately. */ - for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.'; - fraction_start++); - - /* Parse base */ - memcpy(seconds_buf, buf, fraction_start); - seconds = strtol(seconds_buf, &end, 10); - if (errno == ERANGE || end != seconds_buf + fraction_start) { - upb_status_seterrf(p->status, "error parsing duration: %s", - seconds_buf); - return false; - } - - if (seconds > 315576000000) { - upb_status_seterrf(p->status, "error parsing duration: " - "maximum acceptable value is " - "315576000000"); - return false; - } - - if (seconds < -315576000000) { - upb_status_seterrf(p->status, "error parsing duration: " - "minimum acceptable value is " - "-315576000000"); - return false; - } - - /* Parse fraction */ - nanos_buf[0] = '0'; - memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start); - val = strtod(nanos_buf, &end); - if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) { - upb_status_seterrf(p->status, "error parsing duration: %s", - nanos_buf); - return false; - } - - nanos = val * 1000000000; - if (seconds < 0) nanos = -nanos; - - /* Clean up buffer */ - multipart_end(p); - - /* Set seconds */ - start_member(p); - capture_begin(p, seconds_membername); - capture_end(p, seconds_membername + 7); - end_membername(p); - upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); - end_member(p); - - /* Set nanos */ - start_member(p); - capture_begin(p, nanos_membername); - capture_end(p, nanos_membername + 5); - end_membername(p); - upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); - end_member(p); - - /* Continue previous arena */ - multipart_startaccum(p); - - return true; -} - -static int parse_timestamp_number(upb_json_parser *p) { - size_t len; - const char *buf; - int val; - - /* atoi() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - multipart_text(p, "\0", 1, false); - - buf = accumulate_getptr(p, &len); - val = atoi(buf); - multipart_end(p); - multipart_startaccum(p); - - return val; -} - -static void start_year(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_year(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_year = parse_timestamp_number(p) - 1900; - return true; -} - -static void start_month(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_month(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_mon = parse_timestamp_number(p) - 1; - return true; -} - -static void start_day(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_day(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_mday = parse_timestamp_number(p); - return true; -} - -static void start_hour(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_hour(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_hour = parse_timestamp_number(p); - return true; -} - -static void start_minute(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_minute(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_min = parse_timestamp_number(p); - return true; -} - -static void start_second(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_second(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_sec = parse_timestamp_number(p); - return true; -} - -static void start_timestamp_base(upb_json_parser *p) { - memset(&p->tm, 0, sizeof(struct tm)); -} - -static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - char nanos_buf[12]; - char *end; - double val = 0.0; - int32_t nanos; - const char *nanos_membername = "nanos"; - - memset(nanos_buf, 0, 12); - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (len > 10) { - upb_status_seterrf(p->status, - "error parsing timestamp: at most 9-digit fraction."); - return false; - } - - /* Parse nanos */ - nanos_buf[0] = '0'; - memcpy(nanos_buf + 1, buf, len); - val = strtod(nanos_buf, &end); - - if (errno == ERANGE || end != nanos_buf + len + 1) { - upb_status_seterrf(p->status, "error parsing timestamp nanos: %s", - nanos_buf); - return false; - } - - nanos = val * 1000000000; - - /* Clean up previous environment */ - multipart_end(p); - - /* Set nanos */ - start_member(p); - capture_begin(p, nanos_membername); - capture_end(p, nanos_membername + 5); - end_membername(p); - upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); - end_member(p); - - /* Continue previous environment */ - multipart_startaccum(p); - - return true; -} - -static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ -static int epoch_days(int year, int month, int day) { - static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, - 181, 212, 243, 273, 304, 334}; - uint32_t year_adj = year + 4800; /* Ensure positive year, multiple of 400. */ - uint32_t febs = year_adj - (month <= 2 ? 1 : 0); /* Februaries since base. */ - uint32_t leap_days = 1 + (febs / 4) - (febs / 100) + (febs / 400); - uint32_t days = 365 * year_adj + leap_days + month_yday[month - 1] + day - 1; - return days - 2472692; /* Adjust to Unix epoch. */ -} - -static int64_t upb_timegm(const struct tm *tp) { - int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); - ret = (ret * 24) + tp->tm_hour; - ret = (ret * 60) + tp->tm_min; - ret = (ret * 60) + tp->tm_sec; - return ret; -} - -static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - int hours; - int64_t seconds; - const char *seconds_membername = "seconds"; - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (buf[0] != 'Z') { - if (sscanf(buf + 1, "%2d:00", &hours) != 1) { - upb_status_seterrf(p->status, "error parsing timestamp offset"); - return false; - } - - if (buf[0] == '+') { - hours = -hours; - } - - p->tm.tm_hour += hours; - } - - /* Normalize tm */ - seconds = upb_timegm(&p->tm); - - /* Check timestamp boundary */ - if (seconds < -62135596800) { - upb_status_seterrf(p->status, "error parsing timestamp: " - "minimum acceptable value is " - "0001-01-01T00:00:00Z"); - return false; - } - - /* Clean up previous environment */ - multipart_end(p); - - /* Set seconds */ - start_member(p); - capture_begin(p, seconds_membername); - capture_end(p, seconds_membername + 7); - end_membername(p); - upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); - end_member(p); - - /* Continue previous environment */ - multipart_startaccum(p); - - return true; -} - -static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) { - return capture_end(p, ptr); -} - -static bool start_fieldmask_path(upb_json_parser *p) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (!check_stack(p)) return false; - - /* Start a new parser frame: parser frames correspond one-to-one with - * handler frames, and string events occur in a sub-frame. */ - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - multipart_startaccum(p); - return true; -} - -static bool lower_camel_push( - upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) { - const char *limit = ptr + len; - bool first = true; - for (;ptr < limit; ptr++) { - if (*ptr >= 'A' && *ptr <= 'Z' && !first) { - char lower = tolower(*ptr); - upb_sink_putstring(p->top->sink, sel, "_", 1, NULL); - upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL); - } else { - upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL); - } - first = false; - } - return true; -} - -static bool end_fieldmask_path(upb_json_parser *p) { - upb_selector_t sel; - - if (!lower_camel_push( - p, getsel_for_handlertype(p, UPB_HANDLER_STRING), - p->accumulated, p->accumulated_len)) { - return false; - } - - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(p->top->sink, sel); - p->top--; - - multipart_end(p); - return true; -} - -static void start_member(upb_json_parser *p) { - UPB_ASSERT(!p->top->f); - multipart_startaccum(p); -} - -/* Helper: invoked during parse_mapentry() to emit the mapentry message's key - * field based on the current contents of the accumulate buffer. */ -static bool parse_mapentry_key(upb_json_parser *p) { - - size_t len; - const char *buf = accumulate_getptr(p, &len); - - /* Emit the key field. We do a bit of ad-hoc parsing here because the - * parser state machine has already decided that this is a string field - * name, and we are reinterpreting it as some arbitrary key type. In - * particular, integer and bool keys are quoted, so we need to parse the - * quoted string contents here. */ - - p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); - if (p->top->f == NULL) { - upb_status_seterrmsg(p->status, "mapentry message has no key"); - return false; - } - switch (upb_fielddef_type(p->top->f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - /* Invoke end_number. The accum buffer has the number's text already. */ - if (!parse_number(p, true)) { - return false; - } - break; - case UPB_TYPE_BOOL: - if (len == 4 && !strncmp(buf, "true", 4)) { - if (!parser_putbool(p, true)) { - return false; - } - } else if (len == 5 && !strncmp(buf, "false", 5)) { - if (!parser_putbool(p, false)) { - return false; - } - } else { - upb_status_seterrmsg(p->status, - "Map bool key not 'true' or 'false'"); - return false; - } - multipart_end(p); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - upb_sink subsink; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, len, &subsink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(subsink, sel, buf, len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(subsink, sel); - multipart_end(p); - break; - } - default: - upb_status_seterrmsg(p->status, "Invalid field type for map key"); - return false; - } - - return true; -} - -/* Helper: emit one map entry (as a submessage in the map field sequence). This - * is invoked from end_membername(), at the end of the map entry's key string, - * with the map key in the accumulate buffer. It parses the key from that - * buffer, emits the handler calls to start the mapentry submessage (setting up - * its subframe in the process), and sets up state in the subframe so that the - * value parser (invoked next) will emit the mapentry's value field and then - * end the mapentry message. */ - -static bool handle_mapentry(upb_json_parser *p) { - const upb_fielddef *mapfield; - const upb_msgdef *mapentrymsg; - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Map entry: p->top->sink is the seq frame, so we need to start a frame - * for the mapentry itself, and then set |f| in that frame so that the map - * value field is parsed, and also set a flag to end the frame after the - * map-entry value is parsed. */ - if (!check_stack(p)) return false; - - mapfield = p->top->mapfield; - mapentrymsg = upb_fielddef_msgsubdef(mapfield); - - inner = start_jsonparser_frame(p); - p->top->f = mapfield; - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); - inner->m = mapentrymsg; - inner->mapfield = mapfield; - - /* Don't set this to true *yet* -- we reuse parsing handlers below to push - * the key field value to the sink, and these handlers will pop the frame - * if they see is_mapentry (when invoked by the parser state machine, they - * would have just seen the map-entry value, not key). */ - inner->is_mapentry = false; - p->top = inner; - - /* send STARTMSG in submsg frame. */ - upb_sink_startmsg(p->top->sink); - - parse_mapentry_key(p); - - /* Set up the value field to receive the map-entry value. */ - p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); - p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ - p->top->mapfield = mapfield; - if (p->top->f == NULL) { - upb_status_seterrmsg(p->status, "mapentry message has no value"); - return false; - } - - return true; -} - -static bool end_membername(upb_json_parser *p) { - UPB_ASSERT(!p->top->f); - - if (!p->top->m) { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } - - if (p->top->is_any) { - return end_any_membername(p); - } else if (p->top->is_map) { - return handle_mapentry(p); - } else { - size_t len; - const char *buf = accumulate_getptr(p, &len); - upb_value v; - - if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { - p->top->f = upb_value_getconstptr(v); - multipart_end(p); - - return true; - } else if (p->ignore_json_unknown) { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } else { - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); - return false; - } - } -} - -static bool end_any_membername(upb_json_parser *p) { - size_t len; - const char *buf = accumulate_getptr(p, &len); - upb_value v; - - if (len == 5 && strncmp(buf, "@type", len) == 0) { - upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v); - p->top->f = upb_value_getconstptr(v); - multipart_end(p); - return true; - } else { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } -} - -static void end_member(upb_json_parser *p) { - /* If we just parsed a map-entry value, end that frame too. */ - if (p->top->is_mapentry) { - upb_selector_t sel; - bool ok; - const upb_fielddef *mapfield; - - UPB_ASSERT(p->top > p->stack); - /* send ENDMSG on submsg. */ - upb_sink_endmsg(p->top->sink, p->status); - mapfield = p->top->mapfield; - - /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ - p->top--; - ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); - UPB_ASSUME(ok); - upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); - } - - p->top->f = NULL; - p->top->is_unknown_field = false; -} - -static void start_any_member(upb_json_parser *p, const char *ptr) { - start_member(p); - json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr); -} - -static void end_any_member(upb_json_parser *p, const char *ptr) { - json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr); - end_member(p); -} - -static bool start_subobject(upb_json_parser *p) { - if (p->top->is_unknown_field) { - if (!check_stack(p)) return false; - - p->top = start_jsonparser_frame(p); - return true; - } - - if (upb_fielddef_ismap(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Beginning of a map. Start a new parser frame in a repeated-field - * context. */ - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->mapfield = p->top->f; - inner->is_map = true; - p->top = inner; - - return true; - } else if (upb_fielddef_issubmsg(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Beginning of a subobject. Start a new parser frame in the submsg - * context. */ - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - set_name_table(p, inner); - p->top = inner; - - if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { - p->top->is_any = true; - p->top->any_frame = json_parser_any_frame_new(p); - } else { - p->top->is_any = false; - p->top->any_frame = NULL; - } - - return true; - } else { - upb_status_seterrf(p->status, - "Object specified for non-message/group field: %s", - upb_fielddef_name(p->top->f)); - return false; - } -} - -static bool start_subobject_full(upb_json_parser *p) { - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_STRUCTVALUE); - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { - start_structvalue_object(p); - } else { - return true; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) { - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) return false; - start_value_object(p, VALUE_STRUCTVALUE); - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } - - return start_subobject(p); -} - -static void end_subobject(upb_json_parser *p) { - if (is_top_level(p)) { - return; - } - - if (p->top->is_map) { - upb_selector_t sel; - p->top--; - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(p->top->sink, sel); - } else { - upb_selector_t sel; - bool is_unknown = p->top->m == NULL; - p->top--; - if (!is_unknown) { - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); - } - } -} - -static void end_subobject_full(upb_json_parser *p) { - end_subobject(p); - - if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { - end_structvalue_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } -} - -static bool start_array(upb_json_parser *p) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_LISTVALUE); - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { - start_listvalue_object(p); - } else { - return false; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) && - (!upb_fielddef_isseq(p->top->f) || - p->top->is_repeated)) { - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) && - (!upb_fielddef_isseq(p->top->f) || - p->top->is_repeated)) { - if (!start_subobject(p)) return false; - start_value_object(p, VALUE_LISTVALUE); - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } - - if (p->top->is_unknown_field) { - inner = start_jsonparser_frame(p); - inner->is_unknown_field = true; - p->top = inner; - - return true; - } - - if (!upb_fielddef_isseq(p->top->f)) { - upb_status_seterrf(p->status, - "Array specified for non-repeated field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(p->top->sink, sel, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - inner->is_repeated = true; - p->top = inner; - - return true; -} - -static void end_array(upb_json_parser *p) { - upb_selector_t sel; - - UPB_ASSERT(p->top > p->stack); - - p->top--; - - if (p->top->is_unknown_field) { - return; - } - - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(p->top->sink, sel); - - if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { - end_listvalue_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } -} - -static void start_object(upb_json_parser *p) { - if (!p->top->is_map && p->top->m != NULL) { - upb_sink_startmsg(p->top->sink); - } -} - -static void end_object(upb_json_parser *p) { - if (!p->top->is_map && p->top->m != NULL) { - upb_sink_endmsg(p->top->sink, p->status); - } -} - -static void start_any_object(upb_json_parser *p, const char *ptr) { - start_object(p); - p->top->any_frame->before_type_url_start = ptr; - p->top->any_frame->before_type_url_end = ptr; -} - -static bool end_any_object(upb_json_parser *p, const char *ptr) { - const char *value_membername = "value"; - bool is_well_known_packed = false; - const char *packed_end = ptr + 1; - upb_selector_t sel; - upb_jsonparser_frame *inner; - - if (json_parser_any_frame_has_value(p->top->any_frame) && - !json_parser_any_frame_has_type_url(p->top->any_frame)) { - upb_status_seterrmsg(p->status, "No valid type url"); - return false; - } - - /* Well known types data is represented as value field. */ - if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) != - UPB_WELLKNOWN_UNSPECIFIED) { - is_well_known_packed = true; - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { - p->top->any_frame->before_type_url_start = - memchr(p->top->any_frame->before_type_url_start, ':', - p->top->any_frame->before_type_url_end - - p->top->any_frame->before_type_url_start); - if (p->top->any_frame->before_type_url_start == NULL) { - upb_status_seterrmsg(p->status, "invalid data for well known type."); - return false; - } - p->top->any_frame->before_type_url_start++; - } - - if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - p->top->any_frame->after_type_url_start = - memchr(p->top->any_frame->after_type_url_start, ':', - (ptr + 1) - - p->top->any_frame->after_type_url_start); - if (p->top->any_frame->after_type_url_start == NULL) { - upb_status_seterrmsg(p->status, "Invalid data for well known type."); - return false; - } - p->top->any_frame->after_type_url_start++; - packed_end = ptr; - } - } - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, - p->top->any_frame->before_type_url_start, - p->top->any_frame->before_type_url_end - - p->top->any_frame->before_type_url_start, NULL)) { - return false; - } - } else { - if (!is_well_known_packed) { - if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) { - return false; - } - } - } - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) && - json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) { - return false; - } - } - - if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, - p->top->any_frame->after_type_url_start, - packed_end - p->top->any_frame->after_type_url_start, NULL)) { - return false; - } - } else { - if (!is_well_known_packed) { - if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) { - return false; - } - } - } - - if (!end(p->top->any_frame->parser, NULL)) { - return false; - } - - p->top->is_any = false; - - /* Set value */ - start_member(p); - capture_begin(p, value_membername); - capture_end(p, value_membername + 5); - end_membername(p); - - if (!check_stack(p)) return false; - inner = p->top + 1; - - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr, - p->top->any_frame->stringsink.len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(inner->sink, sel); - - end_member(p); - - end_object(p); - - /* Deallocate any parse frame. */ - json_parser_any_frame_free(p->top->any_frame); - - return true; -} - -static bool is_string_wrapper(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type == UPB_WELLKNOWN_STRINGVALUE || - type == UPB_WELLKNOWN_BYTESVALUE; -} - -static bool is_fieldmask(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type == UPB_WELLKNOWN_FIELDMASK; -} - -static void start_fieldmask_object(upb_json_parser *p) { - const char *membername = "paths"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + 5); - end_membername(p); - - start_array(p); -} - -static void end_fieldmask_object(upb_json_parser *p) { - end_array(p); - end_member(p); - end_object(p); -} - -static void start_wrapper_object(upb_json_parser *p) { - const char *membername = "value"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + 5); - end_membername(p); -} - -static void end_wrapper_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_value_object(upb_json_parser *p, int value_type) { - const char *nullmember = "null_value"; - const char *numbermember = "number_value"; - const char *stringmember = "string_value"; - const char *boolmember = "bool_value"; - const char *structmember = "struct_value"; - const char *listmember = "list_value"; - const char *membername = ""; - - switch (value_type) { - case VALUE_NULLVALUE: - membername = nullmember; - break; - case VALUE_NUMBERVALUE: - membername = numbermember; - break; - case VALUE_STRINGVALUE: - membername = stringmember; - break; - case VALUE_BOOLVALUE: - membername = boolmember; - break; - case VALUE_STRUCTVALUE: - membername = structmember; - break; - case VALUE_LISTVALUE: - membername = listmember; - break; - } - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_value_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_listvalue_object(upb_json_parser *p) { - const char *membername = "values"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_listvalue_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_structvalue_object(upb_json_parser *p) { - const char *membername = "fields"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_structvalue_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static bool is_top_level(upb_json_parser *p) { - return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; -} - -static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { - return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type; -} - -static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f)) - == type); -} - -static bool does_number_wrapper_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_number_wrapper_end(upb_json_parser *p) { - return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); -} - -static bool is_number_wrapper_object(upb_json_parser *p) { - return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); -} - -static bool does_string_wrapper_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - is_string_wrapper(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_string_wrapper_end(upb_json_parser *p) { - return p->top->m != NULL && is_string_wrapper(p->top->m); -} - -static bool is_string_wrapper_object(upb_json_parser *p) { - return p->top->m != NULL && is_string_wrapper(p->top->m); -} - -static bool does_fieldmask_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - is_fieldmask(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_fieldmask_end(upb_json_parser *p) { - return p->top->m != NULL && is_fieldmask(p->top->m); -} - -#define CHECK_RETURN_TOP(x) if (!(x)) goto error - - -/* The actual parser **********************************************************/ - -/* What follows is the Ragel parser itself. The language is specified in Ragel - * and the actions call our C functions above. - * - * Ragel has an extensive set of functionality, and we use only a small part of - * it. There are many action types but we only use a few: - * - * ">" -- transition into a machine - * "%" -- transition out of a machine - * "@" -- transition into a final state of a machine. - * - * "@" transitions are tricky because a machine can transition into a final - * state repeatedly. But in some cases we know this can't happen, for example - * a string which is delimited by a final '"' can only transition into its - * final state once, when the closing '"' is seen. */ - - -#line 2785 "upb/json/parser.rl" - - - -#line 2588 "upb/json/parser.c" -static const char _json_actions[] = { - 0, 1, 0, 1, 1, 1, 3, 1, - 4, 1, 6, 1, 7, 1, 8, 1, - 9, 1, 11, 1, 12, 1, 13, 1, - 14, 1, 15, 1, 16, 1, 17, 1, - 18, 1, 19, 1, 20, 1, 22, 1, - 23, 1, 24, 1, 35, 1, 37, 1, - 39, 1, 40, 1, 42, 1, 43, 1, - 44, 1, 46, 1, 48, 1, 49, 1, - 50, 1, 51, 1, 53, 1, 54, 2, - 4, 9, 2, 5, 6, 2, 7, 3, - 2, 7, 9, 2, 21, 26, 2, 25, - 10, 2, 27, 28, 2, 29, 30, 2, - 32, 34, 2, 33, 31, 2, 38, 36, - 2, 40, 42, 2, 45, 2, 2, 46, - 54, 2, 47, 36, 2, 49, 54, 2, - 50, 54, 2, 51, 54, 2, 52, 41, - 2, 53, 54, 3, 32, 34, 35, 4, - 21, 26, 27, 28 -}; - -static const short _json_key_offsets[] = { - 0, 0, 12, 13, 18, 23, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, - 38, 43, 44, 48, 53, 58, 63, 67, - 71, 74, 77, 79, 83, 87, 89, 91, - 96, 98, 100, 109, 115, 121, 127, 133, - 135, 139, 142, 144, 146, 149, 150, 154, - 156, 158, 160, 162, 163, 165, 167, 168, - 170, 172, 173, 175, 177, 178, 180, 182, - 183, 185, 187, 191, 193, 195, 196, 197, - 198, 199, 201, 206, 208, 210, 212, 221, - 222, 222, 222, 227, 232, 237, 238, 239, - 240, 241, 241, 242, 243, 244, 244, 245, - 246, 247, 247, 252, 253, 257, 262, 267, - 272, 276, 276, 279, 282, 285, 288, 291, - 294, 294, 294, 294, 294, 294 -}; - -static const char _json_trans_keys[] = { - 32, 34, 45, 91, 102, 110, 116, 123, - 9, 13, 48, 57, 34, 32, 93, 125, - 9, 13, 32, 44, 93, 9, 13, 32, - 93, 125, 9, 13, 97, 108, 115, 101, - 117, 108, 108, 114, 117, 101, 32, 34, - 125, 9, 13, 34, 32, 58, 9, 13, - 32, 93, 125, 9, 13, 32, 44, 125, - 9, 13, 32, 44, 125, 9, 13, 32, - 34, 9, 13, 45, 48, 49, 57, 48, - 49, 57, 46, 69, 101, 48, 57, 69, - 101, 48, 57, 43, 45, 48, 57, 48, - 57, 48, 57, 46, 69, 101, 48, 57, - 34, 92, 34, 92, 34, 47, 92, 98, - 102, 110, 114, 116, 117, 48, 57, 65, - 70, 97, 102, 48, 57, 65, 70, 97, - 102, 48, 57, 65, 70, 97, 102, 48, - 57, 65, 70, 97, 102, 34, 92, 45, - 48, 49, 57, 48, 49, 57, 46, 115, - 48, 57, 115, 48, 57, 34, 46, 115, - 48, 57, 48, 57, 48, 57, 48, 57, - 48, 57, 45, 48, 57, 48, 57, 45, - 48, 57, 48, 57, 84, 48, 57, 48, - 57, 58, 48, 57, 48, 57, 58, 48, - 57, 48, 57, 43, 45, 46, 90, 48, - 57, 48, 57, 58, 48, 48, 34, 48, - 57, 43, 45, 90, 48, 57, 34, 44, - 34, 44, 34, 44, 34, 45, 91, 102, - 110, 116, 123, 48, 57, 34, 32, 93, - 125, 9, 13, 32, 44, 93, 9, 13, - 32, 93, 125, 9, 13, 97, 108, 115, - 101, 117, 108, 108, 114, 117, 101, 32, - 34, 125, 9, 13, 34, 32, 58, 9, - 13, 32, 93, 125, 9, 13, 32, 44, - 125, 9, 13, 32, 44, 125, 9, 13, - 32, 34, 9, 13, 32, 9, 13, 32, - 9, 13, 32, 9, 13, 32, 9, 13, - 32, 9, 13, 32, 9, 13, 0 -}; - -static const char _json_single_lengths[] = { - 0, 8, 1, 3, 3, 3, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 2, 3, 3, 3, 2, 2, - 1, 3, 0, 2, 2, 0, 0, 3, - 2, 2, 9, 0, 0, 0, 0, 2, - 2, 1, 2, 0, 1, 1, 2, 0, - 0, 0, 0, 1, 0, 0, 1, 0, - 0, 1, 0, 0, 1, 0, 0, 1, - 0, 0, 4, 0, 0, 1, 1, 1, - 1, 0, 3, 2, 2, 2, 7, 1, - 0, 0, 3, 3, 3, 1, 1, 1, - 1, 0, 1, 1, 1, 0, 1, 1, - 1, 0, 3, 1, 2, 3, 3, 3, - 2, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0 -}; - -static const char _json_range_lengths[] = { - 0, 2, 0, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 3, 3, 3, 3, 0, - 1, 1, 0, 1, 1, 0, 1, 1, - 1, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 0, 0, - 0, 1, 1, 0, 0, 0, 1, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0 -}; - -static const short _json_index_offsets[] = { - 0, 0, 11, 13, 18, 23, 28, 30, - 32, 34, 36, 38, 40, 42, 44, 46, - 48, 53, 55, 59, 64, 69, 74, 78, - 82, 85, 89, 91, 95, 99, 101, 103, - 108, 111, 114, 124, 128, 132, 136, 140, - 143, 147, 150, 153, 155, 158, 160, 164, - 166, 168, 170, 172, 174, 176, 178, 180, - 182, 184, 186, 188, 190, 192, 194, 196, - 198, 200, 202, 207, 209, 211, 213, 215, - 217, 219, 221, 226, 229, 232, 235, 244, - 246, 247, 248, 253, 258, 263, 265, 267, - 269, 271, 272, 274, 276, 278, 279, 281, - 283, 285, 286, 291, 293, 297, 302, 307, - 312, 316, 317, 320, 323, 326, 329, 332, - 335, 336, 337, 338, 339, 340 -}; - -static const unsigned char _json_indicies[] = { - 0, 2, 3, 4, 5, 6, 7, 8, - 0, 3, 1, 9, 1, 11, 12, 1, - 11, 10, 13, 14, 12, 13, 1, 14, - 1, 1, 14, 10, 15, 1, 16, 1, - 17, 1, 18, 1, 19, 1, 20, 1, - 21, 1, 22, 1, 23, 1, 24, 1, - 25, 26, 27, 25, 1, 28, 1, 29, - 30, 29, 1, 30, 1, 1, 30, 31, - 32, 33, 34, 32, 1, 35, 36, 27, - 35, 1, 36, 26, 36, 1, 37, 38, - 39, 1, 38, 39, 1, 41, 42, 42, - 40, 43, 1, 42, 42, 43, 40, 44, - 44, 45, 1, 45, 1, 45, 40, 41, - 42, 42, 39, 40, 47, 48, 46, 50, - 51, 49, 52, 52, 52, 52, 52, 52, - 52, 52, 53, 1, 54, 54, 54, 1, - 55, 55, 55, 1, 56, 56, 56, 1, - 57, 57, 57, 1, 59, 60, 58, 61, - 62, 63, 1, 64, 65, 1, 66, 67, - 1, 68, 1, 67, 68, 1, 69, 1, - 66, 67, 65, 1, 70, 1, 71, 1, - 72, 1, 73, 1, 74, 1, 75, 1, - 76, 1, 77, 1, 78, 1, 79, 1, - 80, 1, 81, 1, 82, 1, 83, 1, - 84, 1, 85, 1, 86, 1, 87, 1, - 88, 1, 89, 89, 90, 91, 1, 92, - 1, 93, 1, 94, 1, 95, 1, 96, - 1, 97, 1, 98, 1, 99, 99, 100, - 98, 1, 102, 1, 101, 104, 105, 103, - 1, 1, 101, 106, 107, 108, 109, 110, - 111, 112, 107, 1, 113, 1, 114, 115, - 117, 118, 1, 117, 116, 119, 120, 118, - 119, 1, 120, 1, 1, 120, 116, 121, - 1, 122, 1, 123, 1, 124, 1, 125, - 126, 1, 127, 1, 128, 1, 129, 130, - 1, 131, 1, 132, 1, 133, 134, 135, - 136, 134, 1, 137, 1, 138, 139, 138, - 1, 139, 1, 1, 139, 140, 141, 142, - 143, 141, 1, 144, 145, 136, 144, 1, - 145, 135, 145, 1, 146, 147, 147, 1, - 148, 148, 1, 149, 149, 1, 150, 150, - 1, 151, 151, 1, 152, 152, 1, 1, - 1, 1, 1, 1, 1, 0 -}; - -static const char _json_trans_targs[] = { - 1, 0, 2, 107, 3, 6, 10, 13, - 16, 106, 4, 3, 106, 4, 5, 7, - 8, 9, 108, 11, 12, 109, 14, 15, - 110, 16, 17, 111, 18, 18, 19, 20, - 21, 22, 111, 21, 22, 24, 25, 31, - 112, 26, 28, 27, 29, 30, 33, 113, - 34, 33, 113, 34, 32, 35, 36, 37, - 38, 39, 33, 113, 34, 41, 42, 46, - 42, 46, 43, 45, 44, 114, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, - 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 73, 72, 68, 69, 70, 71, - 72, 115, 74, 67, 72, 76, 116, 76, - 116, 77, 79, 81, 82, 85, 90, 94, - 98, 80, 117, 117, 83, 82, 80, 83, - 84, 86, 87, 88, 89, 117, 91, 92, - 93, 117, 95, 96, 97, 117, 98, 99, - 105, 100, 100, 101, 102, 103, 104, 105, - 103, 104, 117, 106, 106, 106, 106, 106, - 106 -}; - -static const unsigned char _json_trans_actions[] = { - 0, 0, 113, 107, 53, 0, 0, 0, - 125, 59, 45, 0, 55, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 101, 51, 47, 0, 0, 45, - 49, 49, 104, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 5, 15, - 0, 0, 71, 7, 13, 0, 74, 9, - 9, 9, 77, 80, 11, 37, 37, 37, - 0, 0, 0, 39, 0, 41, 86, 0, - 0, 0, 17, 19, 0, 21, 23, 0, - 25, 27, 0, 29, 31, 0, 33, 35, - 0, 135, 83, 135, 0, 0, 0, 0, - 0, 92, 0, 89, 89, 98, 43, 0, - 131, 95, 113, 107, 53, 0, 0, 0, - 125, 59, 69, 110, 45, 0, 55, 0, - 0, 0, 0, 0, 0, 119, 0, 0, - 0, 122, 0, 0, 0, 116, 0, 101, - 51, 47, 0, 0, 45, 49, 49, 104, - 0, 0, 128, 0, 57, 63, 65, 61, - 67 -}; - -static const unsigned char _json_eof_actions[] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 0, 0, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 57, 63, 65, 61, 67, - 0, 0, 0, 0, 0, 0 -}; - -static const int json_start = 1; - -static const int json_en_number_machine = 23; -static const int json_en_string_machine = 32; -static const int json_en_duration_machine = 40; -static const int json_en_timestamp_machine = 47; -static const int json_en_fieldmask_machine = 75; -static const int json_en_value_machine = 78; -static const int json_en_main = 1; - - -#line 2788 "upb/json/parser.rl" - -size_t parse(void *closure, const void *hd, const char *buf, size_t size, - const upb_bufhandle *handle) { - upb_json_parser *parser = closure; - - /* Variables used by Ragel's generated code. */ - int cs = parser->current_state; - int *stack = parser->parser_stack; - int top = parser->parser_top; - - const char *p = buf; - const char *pe = buf + size; - const char *eof = &eof_ch; - - parser->handle = handle; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - capture_resume(parser, buf); - - -#line 2866 "upb/json/parser.c" - { - int _klen; - unsigned int _trans; - const char *_acts; - unsigned int _nacts; - const char *_keys; - - if ( p == pe ) - goto _test_eof; - if ( cs == 0 ) - goto _out; -_resume: - _keys = _json_trans_keys + _json_key_offsets[cs]; - _trans = _json_index_offsets[cs]; - - _klen = _json_single_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + _klen - 1; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( (*p) < *_mid ) - _upper = _mid - 1; - else if ( (*p) > *_mid ) - _lower = _mid + 1; - else { - _trans += (unsigned int)(_mid - _keys); - goto _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _json_range_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + (_klen<<1) - 2; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( (*p) < _mid[0] ) - _upper = _mid - 2; - else if ( (*p) > _mid[1] ) - _lower = _mid + 2; - else { - _trans += (unsigned int)((_mid - _keys)>>1); - goto _match; - } - } - _trans += _klen; - } - -_match: - _trans = _json_indicies[_trans]; - cs = _json_trans_targs[_trans]; - - if ( _json_trans_actions[_trans] == 0 ) - goto _again; - - _acts = _json_actions + _json_trans_actions[_trans]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - { - switch ( *_acts++ ) - { - case 1: -#line 2593 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 2: -#line 2595 "upb/json/parser.rl" - { p--; {stack[top++] = cs; cs = 23;goto _again;} } - break; - case 3: -#line 2599 "upb/json/parser.rl" - { start_text(parser, p); } - break; - case 4: -#line 2600 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_text(parser, p)); } - break; - case 5: -#line 2606 "upb/json/parser.rl" - { start_hex(parser); } - break; - case 6: -#line 2607 "upb/json/parser.rl" - { hexdigit(parser, p); } - break; - case 7: -#line 2608 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_hex(parser)); } - break; - case 8: -#line 2614 "upb/json/parser.rl" - { CHECK_RETURN_TOP(escape(parser, p)); } - break; - case 9: -#line 2620 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 10: -#line 2625 "upb/json/parser.rl" - { start_year(parser, p); } - break; - case 11: -#line 2626 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_year(parser, p)); } - break; - case 12: -#line 2630 "upb/json/parser.rl" - { start_month(parser, p); } - break; - case 13: -#line 2631 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_month(parser, p)); } - break; - case 14: -#line 2635 "upb/json/parser.rl" - { start_day(parser, p); } - break; - case 15: -#line 2636 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_day(parser, p)); } - break; - case 16: -#line 2640 "upb/json/parser.rl" - { start_hour(parser, p); } - break; - case 17: -#line 2641 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_hour(parser, p)); } - break; - case 18: -#line 2645 "upb/json/parser.rl" - { start_minute(parser, p); } - break; - case 19: -#line 2646 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_minute(parser, p)); } - break; - case 20: -#line 2650 "upb/json/parser.rl" - { start_second(parser, p); } - break; - case 21: -#line 2651 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_second(parser, p)); } - break; - case 22: -#line 2656 "upb/json/parser.rl" - { start_duration_base(parser, p); } - break; - case 23: -#line 2657 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_duration_base(parser, p)); } - break; - case 24: -#line 2659 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 25: -#line 2664 "upb/json/parser.rl" - { start_timestamp_base(parser); } - break; - case 26: -#line 2666 "upb/json/parser.rl" - { start_timestamp_fraction(parser, p); } - break; - case 27: -#line 2667 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } - break; - case 28: -#line 2669 "upb/json/parser.rl" - { start_timestamp_zone(parser, p); } - break; - case 29: -#line 2670 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } - break; - case 30: -#line 2672 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 31: -#line 2677 "upb/json/parser.rl" - { start_fieldmask_path_text(parser, p); } - break; - case 32: -#line 2678 "upb/json/parser.rl" - { end_fieldmask_path_text(parser, p); } - break; - case 33: -#line 2683 "upb/json/parser.rl" - { start_fieldmask_path(parser); } - break; - case 34: -#line 2684 "upb/json/parser.rl" - { end_fieldmask_path(parser); } - break; - case 35: -#line 2690 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 36: -#line 2695 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { - {stack[top++] = cs; cs = 47;goto _again;} - } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) { - {stack[top++] = cs; cs = 40;goto _again;} - } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) { - {stack[top++] = cs; cs = 75;goto _again;} - } else { - {stack[top++] = cs; cs = 32;goto _again;} - } - } - break; - case 37: -#line 2708 "upb/json/parser.rl" - { p--; {stack[top++] = cs; cs = 78;goto _again;} } - break; - case 38: -#line 2713 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - start_any_member(parser, p); - } else { - start_member(parser); - } - } - break; - case 39: -#line 2720 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_membername(parser)); } - break; - case 40: -#line 2723 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - end_any_member(parser, p); - } else { - end_member(parser); - } - } - break; - case 41: -#line 2734 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - start_any_object(parser, p); - } else { - start_object(parser); - } - } - break; - case 42: -#line 2743 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - CHECK_RETURN_TOP(end_any_object(parser, p)); - } else { - end_object(parser); - } - } - break; - case 43: -#line 2755 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_array(parser)); } - break; - case 44: -#line 2759 "upb/json/parser.rl" - { end_array(parser); } - break; - case 45: -#line 2764 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_number(parser, p)); } - break; - case 46: -#line 2765 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_number(parser, p)); } - break; - case 47: -#line 2767 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_stringval(parser)); } - break; - case 48: -#line 2768 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_stringval(parser)); } - break; - case 49: -#line 2770 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, true)); } - break; - case 50: -#line 2772 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, false)); } - break; - case 51: -#line 2774 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_null(parser)); } - break; - case 52: -#line 2776 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_subobject_full(parser)); } - break; - case 53: -#line 2777 "upb/json/parser.rl" - { end_subobject_full(parser); } - break; - case 54: -#line 2782 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; -#line 3190 "upb/json/parser.c" - } - } - -_again: - if ( cs == 0 ) - goto _out; - if ( ++p != pe ) - goto _resume; - _test_eof: {} - if ( p == eof ) - { - const char *__acts = _json_actions + _json_eof_actions[cs]; - unsigned int __nacts = (unsigned int) *__acts++; - while ( __nacts-- > 0 ) { - switch ( *__acts++ ) { - case 0: -#line 2591 "upb/json/parser.rl" - { p--; {cs = stack[--top]; if ( p == pe ) - goto _test_eof; -goto _again;} } - break; - case 46: -#line 2765 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_number(parser, p)); } - break; - case 49: -#line 2770 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, true)); } - break; - case 50: -#line 2772 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, false)); } - break; - case 51: -#line 2774 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_null(parser)); } - break; - case 53: -#line 2777 "upb/json/parser.rl" - { end_subobject_full(parser); } - break; -#line 3232 "upb/json/parser.c" - } - } - } - - _out: {} - } - -#line 2810 "upb/json/parser.rl" - - if (p != pe) { - upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", (int)(pe - p), - p); - } else { - capture_suspend(parser, &p); - } - -error: - /* Save parsing state back to parser. */ - parser->current_state = cs; - parser->parser_top = top; - - return p - buf; -} - -static bool end(void *closure, const void *hd) { - upb_json_parser *parser = closure; - - /* Prevent compile warning on unused static constants. */ - UPB_UNUSED(json_start); - UPB_UNUSED(json_en_duration_machine); - UPB_UNUSED(json_en_fieldmask_machine); - UPB_UNUSED(json_en_number_machine); - UPB_UNUSED(json_en_string_machine); - UPB_UNUSED(json_en_timestamp_machine); - UPB_UNUSED(json_en_value_machine); - UPB_UNUSED(json_en_main); - - parse(parser, hd, &eof_ch, 0, NULL); - - return parser->current_state >= 106; -} - -static void json_parser_reset(upb_json_parser *p) { - int cs; - int top; - - p->top = p->stack; - init_frame(p->top); - - /* Emit Ragel initialization of the parser. */ - -#line 3284 "upb/json/parser.c" - { - cs = json_start; - top = 0; - } - -#line 2853 "upb/json/parser.rl" - p->current_state = cs; - p->parser_top = top; - accumulate_clear(p); - p->multipart_state = MULTIPART_INACTIVE; - p->capture = NULL; - p->accumulated = NULL; -} - -static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, - const upb_msgdef *md) { - int i, n; - upb_alloc *alloc = upb_arena_alloc(c->arena); - - upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m)); - - m->cache = c; - - upb_byteshandler_init(&m->input_handler_); - upb_byteshandler_setstring(&m->input_handler_, parse, m); - upb_byteshandler_setendstr(&m->input_handler_, end, m); - - upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc); - - /* Build name_table */ - - n = upb_msgdef_fieldcount(md); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - upb_value v = upb_value_constptr(f); - const char *name; - - /* Add an entry for the JSON name. */ - name = upb_fielddef_jsonname(f); - upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - - if (strcmp(name, upb_fielddef_name(f)) != 0) { - /* Since the JSON name is different from the regular field name, add an - * entry for the raw name (compliant proto3 JSON parsers must accept - * both). */ - const char *name = upb_fielddef_name(f); - upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - } - } - - return m; -} - -/* Public API *****************************************************************/ - -upb_json_parser *upb_json_parser_create(upb_arena *arena, - const upb_json_parsermethod *method, - const upb_symtab* symtab, - upb_sink output, - upb_status *status, - bool ignore_json_unknown) { - upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser)); - if (!p) return false; - - p->arena = arena; - p->method = method; - p->status = status; - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_bytessink_reset(&p->input_, &method->input_handler_, p); - - json_parser_reset(p); - p->top->sink = output; - p->top->m = upb_handlers_msgdef(output.handlers); - if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { - p->top->is_any = true; - p->top->any_frame = json_parser_any_frame_new(p); - } else { - p->top->is_any = false; - p->top->any_frame = NULL; - } - set_name_table(p, p->top); - p->symtab = symtab; - - p->ignore_json_unknown = ignore_json_unknown; - - return p; -} - -upb_bytessink upb_json_parser_input(upb_json_parser *p) { - return p->input_; -} - -const upb_byteshandler *upb_json_parsermethod_inputhandler( - const upb_json_parsermethod *m) { - return &m->input_handler_; -} - -upb_json_codecache *upb_json_codecache_new(void) { - upb_alloc *alloc; - upb_json_codecache *c; - - c = upb_gmalloc(sizeof(*c)); - - c->arena = upb_arena_new(); - alloc = upb_arena_alloc(c->arena); - - upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc); - - return c; -} - -void upb_json_codecache_free(upb_json_codecache *c) { - upb_arena_free(c->arena); - upb_gfree(c); -} - -const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c, - const upb_msgdef *md) { - upb_json_parsermethod *m; - upb_value v; - int i, n; - upb_alloc *alloc = upb_arena_alloc(c->arena); - - if (upb_inttable_lookupptr(&c->methods, md, &v)) { - return upb_value_getconstptr(v); - } - - m = parsermethod_new(c, md); - v = upb_value_constptr(m); - - if (!m) return NULL; - if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL; - - /* Populate parser methods for all submessages, so the name tables will - * be available during parsing. */ - n = upb_msgdef_fieldcount(md); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - - if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); - const upb_json_parsermethod *sub_method = - upb_json_codecache_get(c, subdef); - - if (!sub_method) return NULL; - } - } - - return m; -} diff --git a/tests/BUILD b/tests/BUILD index ad12fce14a..ccc930720e 100644 --- a/tests/BUILD +++ b/tests/BUILD @@ -29,7 +29,6 @@ cc_library( ], copts = UPB_DEFAULT_CPPOPTS, deps = [ - "//:handlers", "//:port", "//:upb", ], @@ -76,32 +75,6 @@ upb_proto_library( deps = ["@com_google_protobuf//:test_messages_proto3_proto"], ) -proto_library( - name = "test_decoder_proto", - srcs = [ - "pb/test_decoder.proto", - ], -) - -upb_proto_reflection_library( - name = "test_decoder_upb_proto", - deps = [":test_decoder_proto"], -) - -cc_test( - name = "test_decoder", - srcs = ["pb/test_decoder.cc"], - copts = UPB_DEFAULT_CPPOPTS, - deps = [ - ":test_decoder_upb_proto", - ":upb_test", - "//:handlers", - "//:port", - "//:upb", - "//:upb_pb", - ], -) - proto_library( name = "test_cpp_proto", srcs = [ @@ -121,11 +94,9 @@ cc_test( deps = [ ":test_cpp_upb_proto", ":upb_test", - "//:handlers", "//:port", "//:reflection", "//:upb", - "//:upb_pb", ], ) @@ -160,61 +131,6 @@ cc_binary( ], ) -# copybara:strip_for_google3_begin -cc_test( - name = "test_encoder", - srcs = ["pb/test_encoder.cc"], - copts = UPB_DEFAULT_CPPOPTS, - deps = [ - ":upb_test", - "//:descriptor_upb_proto", - "//:descriptor_upb_proto_reflection", - "//:upb", - "//:upb_pb", - ], -) - -proto_library( - name = "test_json_enum_from_separate", - srcs = ["json/enum_from_separate_file.proto"], - deps = [":test_json_proto"], -) - -proto_library( - name = "test_json_proto", - srcs = ["json/test.proto"], -) - -upb_proto_reflection_library( - name = "test_json_upb_proto_reflection", - deps = ["test_json_proto"], -) - -upb_proto_library( - name = "test_json_enum_from_separate_upb_proto", - deps = [":test_json_enum_from_separate"], -) - -upb_proto_library( - name = "test_json_upb_proto", - deps = [":test_json_proto"], -) - -cc_test( - name = "test_json", - srcs = [ - "json/test_json.cc", - ], - copts = UPB_DEFAULT_CPPOPTS, - deps = [ - ":test_json_upb_proto", - ":test_json_upb_proto_reflection", - ":upb_test", - "//:upb_json", - ], -) -# copybara:strip_end - upb_proto_library( name = "conformance_proto_upb", testonly = 1, diff --git a/tests/json/enum_from_separate_file.proto b/tests/json/enum_from_separate_file.proto deleted file mode 100644 index ceb9b42aa1..0000000000 --- a/tests/json/enum_from_separate_file.proto +++ /dev/null @@ -1,9 +0,0 @@ -syntax = "proto2"; - -import "tests/json/test.proto"; - -package upb.test.json; - -message ImportEnum { - optional MyEnum e = 1; -} diff --git a/tests/json/test.proto b/tests/json/test.proto deleted file mode 100644 index 2db0388d77..0000000000 --- a/tests/json/test.proto +++ /dev/null @@ -1,47 +0,0 @@ -syntax = "proto3"; - -package upb.test.json; - -message TestMessage { - int32 optional_int32 = 1; - int64 optional_int64 = 2; - int32 optional_uint32 = 3; - int64 optional_uint64 = 4; - string optional_string = 5; - bytes optional_bytes = 6; - bool optional_bool = 7; - SubMessage optional_msg = 8; - MyEnum optional_enum = 9; - - repeated int32 repeated_int32 = 11; - repeated int64 repeated_int64 = 12; - repeated uint32 repeated_uint32 = 13; - repeated uint64 repeated_uint64 = 14; - repeated string repeated_string = 15; - repeated bytes repeated_bytes = 16; - repeated bool repeated_bool = 17; - repeated SubMessage repeated_msg = 18; - repeated MyEnum repeated_enum = 19; - - map map_string_string = 20; - map map_int32_string = 21; - map map_bool_string = 22; - map map_string_int32 = 23; - map map_string_bool = 24; - map map_string_msg = 25; - - oneof o { - int32 oneof_int32 = 26; - int64 oneof_int64 = 27; - } -} - -message SubMessage { - int32 foo = 1; -} - -enum MyEnum { - A = 0; - B = 1; - C = 2; -} diff --git a/tests/json/test.proto.pb b/tests/json/test.proto.pb deleted file mode 100644 index 94b8b92e61..0000000000 Binary files a/tests/json/test.proto.pb and /dev/null differ diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc deleted file mode 100644 index 18ab07fe50..0000000000 --- a/tests/json/test_json.cc +++ /dev/null @@ -1,336 +0,0 @@ -/* - * - * A set of tests for JSON parsing and serialization. - */ - -#include - -#include "tests/json/test.upb.h" // Test that it compiles for C++. -#include "tests/json/test.upbdefs.h" -#include "tests/test_util.h" -#include "tests/upb_test.h" -#include "upb/def.hpp" -#include "upb/handlers.h" -#include "upb/json/parser.h" -#include "upb/json/printer.h" -#include "upb/port_def.inc" -#include "upb/upb.h" - -// Macros for readability in test case list: allows us to give TEST("...") / -// EXPECT("...") pairs. -#define TEST(x) x -#define EXPECT_SAME NULL -#define EXPECT(x) x -#define TEST_SENTINEL { NULL, NULL } - -struct TestCase { - const char* input; - const char* expected; -}; - -bool verbose = false; - -static TestCase kTestRoundtripMessages[] = { - // Test most fields here. - { - TEST("{\"optionalInt32\":-42,\"optionalString\":\"Test\\u0001Message\"," - "\"optionalMsg\":{\"foo\":42}," - "\"optionalBool\":true,\"repeatedMsg\":[{\"foo\":1}," - "{\"foo\":2}]}"), - EXPECT_SAME - }, - // We must also recognize raw proto names. - { - TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\"," - "\"optional_msg\":{\"foo\":42}," - "\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1}," - "{\"foo\":2}]}"), - EXPECT("{\"optionalInt32\":-42,\"optionalString\":\"Test\\u0001Message\"," - "\"optionalMsg\":{\"foo\":42}," - "\"optionalBool\":true,\"repeatedMsg\":[{\"foo\":1}," - "{\"foo\":2}]}") - }, - // Test special escapes in strings. - { - TEST("{\"repeatedString\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\"," - "\"\uFFFF\"]}"), - EXPECT_SAME - }, - // Test enum symbolic names. - { - // The common case: parse and print the symbolic name. - TEST("{\"optionalEnum\":\"A\"}"), - EXPECT_SAME - }, - { - // Unknown enum value: will be printed as an integer. - TEST("{\"optionalEnum\":42}"), - EXPECT_SAME - }, - { - // Known enum value: we're happy to parse an integer but we will re-emit the - // symbolic name. - TEST("{\"optionalEnum\":1}"), - EXPECT("{\"optionalEnum\":\"B\"}") - }, - // UTF-8 tests: escapes -> literal UTF8 in output. - { - // Note double escape on \uXXXX: we want the escape to be processed by the - // JSON parser, not by the C++ compiler! - TEST("{\"optionalString\":\"\\u007F\"}"), - EXPECT("{\"optionalString\":\"\x7F\"}") - }, - { - TEST("{\"optionalString\":\"\\u0080\"}"), - EXPECT("{\"optionalString\":\"\xC2\x80\"}") - }, - { - TEST("{\"optionalString\":\"\\u07FF\"}"), - EXPECT("{\"optionalString\":\"\xDF\xBF\"}") - }, - { - TEST("{\"optionalString\":\"\\u0800\"}"), - EXPECT("{\"optionalString\":\"\xE0\xA0\x80\"}") - }, - { - TEST("{\"optionalString\":\"\\uFFFF\"}"), - EXPECT("{\"optionalString\":\"\xEF\xBF\xBF\"}") - }, - // map-field tests - { - TEST("{\"mapStringString\":{\"a\":\"value1\",\"b\":\"value2\"," - "\"c\":\"value3\"}}"), - EXPECT_SAME - }, - { - TEST("{\"mapInt32String\":{\"1\":\"value1\",\"-1\":\"value2\"," - "\"1234\":\"value3\"}}"), - EXPECT_SAME - }, - { - TEST("{\"mapBoolString\":{\"false\":\"value1\",\"true\":\"value2\"}}"), - EXPECT_SAME - }, - { - TEST("{\"mapStringInt32\":{\"asdf\":1234,\"jkl;\":-1}}"), - EXPECT_SAME - }, - { - TEST("{\"mapStringBool\":{\"asdf\":true,\"jkl;\":false}}"), - EXPECT_SAME - }, - { - TEST("{\"mapStringMsg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"), - EXPECT_SAME - }, - TEST_SENTINEL -}; - -static TestCase kTestRoundtripMessagesPreserve[] = { - // Test most fields here. - { - TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\"," - "\"optional_msg\":{\"foo\":42}," - "\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1}," - "{\"foo\":2}]}"), - EXPECT_SAME - }, - TEST_SENTINEL -}; - -static TestCase kTestSkipUnknown[] = { - { - TEST("{\"optionalEnum\":\"UNKNOWN_ENUM_VALUE\"}"), - EXPECT("{}"), - }, - TEST_SENTINEL -}; - -static TestCase kTestFailure[] = { - { - TEST("{\"optionalEnum\":\"UNKNOWN_ENUM_VALUE\"}"), - EXPECT("{}"), /* Actually we expect error, this is checked later. */ - }, - TEST_SENTINEL -}; - -class StringSink { - public: - StringSink() { - upb_byteshandler_init(&byteshandler_); - upb_byteshandler_setstring(&byteshandler_, &str_handler, NULL); - upb_bytessink_reset(&bytessink_, &byteshandler_, &s_); - } - ~StringSink() { } - - upb_bytessink Sink() { return bytessink_; } - - const std::string& Data() { return s_; } - - private: - - static size_t str_handler(void* _closure, const void* hd, - const char* data, size_t len, - const upb_bufhandle* handle) { - UPB_UNUSED(hd); - UPB_UNUSED(handle); - std::string* s = static_cast(_closure); - std::string appended(data, len); - s->append(data, len); - return len; - } - - upb_byteshandler byteshandler_; - upb_bytessink bytessink_; - std::string s_; -}; - -void test_json_roundtrip_message(const char* json_src, - const char* json_expected, - const upb::Handlers* serialize_handlers, - const upb::json::ParserMethodPtr parser_method, - int seam, - bool ignore_unknown) { - VerboseParserEnvironment env(verbose); - StringSink data_sink; - upb::json::PrinterPtr printer = upb::json::PrinterPtr::Create( - env.arena(), serialize_handlers, data_sink.Sink()); - upb::json::ParserPtr parser = upb::json::ParserPtr::Create( - env.arena(), parser_method, NULL, printer.input(), - env.status(), ignore_unknown); - env.ResetBytesSink(parser.input()); - env.Reset(json_src, strlen(json_src), false, false); - - bool ok = env.Start() && - env.ParseBuffer(seam) && - env.ParseBuffer(-1) && - env.End(); - - ASSERT(ok); - ASSERT(env.CheckConsistency()); - - if (memcmp(json_expected, - data_sink.Data().data(), - data_sink.Data().size())) { - fprintf(stderr, - "JSON parse/serialize roundtrip result differs:\n" - "Expected:\n%s\nParsed/Serialized:\n%s\n", - json_expected, data_sink.Data().c_str()); - abort(); - } -} - -// Starts with a message in JSON format, parses and directly serializes again, -// and compares the result. -void test_json_roundtrip() { - upb::SymbolTable symtab; - upb::HandlerCache serialize_handlercache( - upb::json::PrinterPtr::NewCache(false)); - upb::json::CodeCache parse_codecache; - - upb::MessageDefPtr md(upb_test_json_TestMessage_getmsgdef(symtab.ptr())); - ASSERT(md); - const upb::Handlers* serialize_handlers = serialize_handlercache.Get(md); - const upb::json::ParserMethodPtr parser_method = parse_codecache.Get(md); - ASSERT(serialize_handlers); - - for (const TestCase* test_case = kTestRoundtripMessages; - test_case->input != NULL; test_case++) { - const char *expected = - (test_case->expected == EXPECT_SAME) ? - test_case->input : - test_case->expected; - - for (size_t i = 0; i < strlen(test_case->input); i++) { - test_json_roundtrip_message(test_case->input, expected, - serialize_handlers, parser_method, (int)i, - false); - } - } - - // Tests ignore unknown. - for (const TestCase* test_case = kTestSkipUnknown; - test_case->input != NULL; test_case++) { - const char *expected = - (test_case->expected == EXPECT_SAME) ? - test_case->input : - test_case->expected; - - for (size_t i = 0; i < strlen(test_case->input); i++) { - test_json_roundtrip_message(test_case->input, expected, - serialize_handlers, parser_method, (int)i, - true); - } - } - - serialize_handlercache = upb::json::PrinterPtr::NewCache(true); - serialize_handlers = serialize_handlercache.Get(md); - - for (const TestCase* test_case = kTestRoundtripMessagesPreserve; - test_case->input != NULL; test_case++) { - const char *expected = - (test_case->expected == EXPECT_SAME) ? - test_case->input : - test_case->expected; - - for (size_t i = 0; i < strlen(test_case->input); i++) { - test_json_roundtrip_message(test_case->input, expected, - serialize_handlers, parser_method, (int)i, - false); - } - } -} - -void test_json_parse_failure(const char* json_src, - const upb::Handlers* serialize_handlers, - const upb::json::ParserMethodPtr parser_method, - int seam) { - VerboseParserEnvironment env(verbose); - StringSink data_sink; - upb::json::PrinterPtr printer = upb::json::PrinterPtr::Create( - env.arena(), serialize_handlers, data_sink.Sink()); - upb::json::ParserPtr parser = upb::json::ParserPtr::Create( - env.arena(), parser_method, NULL, printer.input(), env.status(), false); - env.ResetBytesSink(parser.input()); - env.Reset(json_src, strlen(json_src), false, true); - - bool ok = env.Start() && - env.ParseBuffer(seam) && - env.ParseBuffer(-1) && - env.End(); - - ASSERT(!ok); - ASSERT(env.CheckConsistency()); -} - -// Starts with a proto message in JSON format, parses and expects failre. -void test_json_failure() { - upb::SymbolTable symtab; - upb::HandlerCache serialize_handlercache( - upb::json::PrinterPtr::NewCache(false)); - upb::json::CodeCache parse_codecache; - - upb::MessageDefPtr md(upb_test_json_TestMessage_getmsgdef(symtab.ptr())); - ASSERT(md); - const upb::Handlers* serialize_handlers = serialize_handlercache.Get(md); - const upb::json::ParserMethodPtr parser_method = parse_codecache.Get(md); - ASSERT(serialize_handlers); - - for (const TestCase* test_case = kTestFailure; - test_case->input != NULL; test_case++) { - for (size_t i = 0; i < strlen(test_case->input); i++) { - test_json_parse_failure(test_case->input, serialize_handlers, - parser_method, (int)i); - } - } -} - -extern "C" { -int run_tests(int argc, char *argv[]) { - UPB_UNUSED(argc); - UPB_UNUSED(argv); - test_json_roundtrip(); - test_json_failure(); - return 0; -} -} diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc deleted file mode 100644 index 44478f0df7..0000000000 --- a/tests/pb/test_decoder.cc +++ /dev/null @@ -1,1194 +0,0 @@ -/* - * - * An exhaustive set of tests for parsing both valid and invalid protobuf - * input, with buffer breaks in arbitrary places. - * - * Tests to add: - * - string/bytes - * - unknown field handler called appropriately - * - unknown fields can be inserted in random places - * - fuzzing of valid input - * - resource limits (max stack depth, max string len) - * - testing of groups - * - more throrough testing of sequences - * - test skipping of submessages - * - test suspending the decoder - * - buffers that are close enough to the end of the address space that - * pointers overflow (this might be difficult). - * - a few "kitchen sink" examples (one proto that uses all types, lots - * of submsg/sequences, etc. - * - test different handlers at every level and whether handlers fire at - * the correct field path. - * - test skips that extend past the end of current buffer (where decoder - * returns value greater than the size param). - */ - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS // For PRIuS, etc. -#endif - -#include -#include -#include -#include -#include -#include - -#include "tests/test_util.h" -#include "tests/upb_test.h" -#include "tests/pb/test_decoder.upbdefs.h" - -#ifdef AMALGAMATED -#include "upb.h" -#else // AMALGAMATED -#include "upb/handlers.h" -#include "upb/pb/decoder.h" -#include "upb/upb.h" -#endif // !AMALGAMATED - -#include "upb/port_def.inc" - -#undef PRINT_FAILURE -#define PRINT_FAILURE(expr) \ - fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ - fprintf(stderr, "expr: %s\n", #expr); \ - -#define MAX_NESTING 64 - -#define LINE(x) x "\n" - -uint32_t filter_hash = 0; -double completed; -double total; -double *count; - -enum TestMode { - COUNT_ONLY = 1, - NO_HANDLERS = 2, - ALL_HANDLERS = 3 -} test_mode; - -// Copied from decoder.c, since this is not a public interface. -typedef struct { - uint8_t native_wire_type; - bool is_numeric; -} upb_decoder_typeinfo; - -static const upb_decoder_typeinfo upb_decoder_types[] = { - {UPB_WIRE_TYPE_END_GROUP, false}, // ENDGROUP - {UPB_WIRE_TYPE_64BIT, true}, // DOUBLE - {UPB_WIRE_TYPE_32BIT, true}, // FLOAT - {UPB_WIRE_TYPE_VARINT, true}, // INT64 - {UPB_WIRE_TYPE_VARINT, true}, // UINT64 - {UPB_WIRE_TYPE_VARINT, true}, // INT32 - {UPB_WIRE_TYPE_64BIT, true}, // FIXED64 - {UPB_WIRE_TYPE_32BIT, true}, // FIXED32 - {UPB_WIRE_TYPE_VARINT, true}, // BOOL - {UPB_WIRE_TYPE_DELIMITED, false}, // STRING - {UPB_WIRE_TYPE_START_GROUP, false}, // GROUP - {UPB_WIRE_TYPE_DELIMITED, false}, // MESSAGE - {UPB_WIRE_TYPE_DELIMITED, false}, // BYTES - {UPB_WIRE_TYPE_VARINT, true}, // UINT32 - {UPB_WIRE_TYPE_VARINT, true}, // ENUM - {UPB_WIRE_TYPE_32BIT, true}, // SFIXED32 - {UPB_WIRE_TYPE_64BIT, true}, // SFIXED64 - {UPB_WIRE_TYPE_VARINT, true}, // SINT32 - {UPB_WIRE_TYPE_VARINT, true}, // SINT64 -}; - -#ifndef USE_GOOGLE -using std::string; -#endif - -void vappendf(string* str, const char *format, va_list args) { - va_list copy; - va_copy(copy, args); - - int count = vsnprintf(NULL, 0, format, args); - if (count >= 0) - { - UPB_ASSERT(count < 32768); - char *buffer = new char[count + 1]; - UPB_ASSERT(buffer); - count = vsnprintf(buffer, count + 1, format, copy); - UPB_ASSERT(count >= 0); - str->append(buffer, count); - delete [] buffer; - } - va_end(copy); -} - -void appendf(string* str, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - vappendf(str, fmt, args); - va_end(args); -} - -void PrintBinary(const string& str) { - for (size_t i = 0; i < str.size(); i++) { - if (isprint(str[i])) { - fprintf(stderr, "%c", str[i]); - } else { - fprintf(stderr, "\\x%02x", (int)(uint8_t)str[i]); - } - } -} - -#define UPB_PB_VARINT_MAX_LEN 10 - -static size_t upb_vencode64(uint64_t val, char *buf) { - size_t i; - if (val == 0) { buf[0] = 0; return 1; } - i = 0; - while (val) { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } - return i; -} - -static uint32_t upb_zzenc_32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} - -static uint64_t upb_zzenc_64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} - -/* Routines for building arbitrary protos *************************************/ - -const string empty; - -string cat(const string& a, const string& b, - const string& c = empty, - const string& d = empty, - const string& e = empty, - const string& f = empty, - const string& g = empty, - const string& h = empty, - const string& i = empty, - const string& j = empty, - const string& k = empty, - const string& l = empty) { - string ret; - ret.reserve(a.size() + b.size() + c.size() + d.size() + e.size() + f.size() + - g.size() + h.size() + i.size() + j.size() + k.size() + l.size()); - ret.append(a); - ret.append(b); - ret.append(c); - ret.append(d); - ret.append(e); - ret.append(f); - ret.append(g); - ret.append(h); - ret.append(i); - ret.append(j); - ret.append(k); - ret.append(l); - return ret; -} - -template -string num2string(T num) { - std::ostringstream ss; - ss << num; - return ss.str(); -} - -string varint(uint64_t x) { - char buf[UPB_PB_VARINT_MAX_LEN]; - size_t len = upb_vencode64(x, buf); - return string(buf, len); -} - -// TODO: proper byte-swapping for big-endian machines. -string fixed32(void *data) { return string(static_cast(data), 4); } -string fixed64(void *data) { return string(static_cast(data), 8); } - -string delim(const string& buf) { return cat(varint(buf.size()), buf); } -string uint32(uint32_t u32) { return fixed32(&u32); } -string uint64(uint64_t u64) { return fixed64(&u64); } -string flt(float f) { return fixed32(&f); } -string dbl(double d) { return fixed64(&d); } -string zz32(int32_t x) { return varint(upb_zzenc_32(x)); } -string zz64(int64_t x) { return varint(upb_zzenc_64(x)); } - -string tag(uint32_t fieldnum, char wire_type) { - return varint((fieldnum << 3) | wire_type); -} - -string submsg(uint32_t fn, const string& buf) { - return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) ); -} - -string group(uint32_t fn, const string& buf) { - return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf, - tag(fn, UPB_WIRE_TYPE_END_GROUP)); -} - -// Like delim()/submsg(), but intentionally encodes an incorrect length. -// These help test when a delimited boundary doesn't land in the right place. -string badlen_delim(int err, const string& buf) { - return cat(varint(buf.size() + err), buf); -} - -string badlen_submsg(int err, uint32_t fn, const string& buf) { - return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), badlen_delim(err, buf) ); -} - - -/* A set of handlers that covers all .proto types *****************************/ - -// The handlers simply append to a string indicating what handlers were called. -// This string is similar to protobuf text format but fields are referred to by -// number instead of name and sequences are explicitly delimited. We indent -// using the closure depth to test that the stack of closures is properly -// handled. - -int closures[MAX_NESTING]; -string output; - -void indentbuf(string *buf, int depth) { - buf->append(2 * depth, ' '); -} - -#define NUMERIC_VALUE_HANDLER(member, ctype, fmt) \ - bool value_##member(int* depth, const uint32_t* num, ctype val) { \ - indentbuf(&output, *depth); \ - appendf(&output, "%" PRIu32 ":%" fmt "\n", *num, val); \ - return true; \ - } - -NUMERIC_VALUE_HANDLER(uint32, uint32_t, PRIu32) -NUMERIC_VALUE_HANDLER(uint64, uint64_t, PRIu64) -NUMERIC_VALUE_HANDLER(int32, int32_t, PRId32) -NUMERIC_VALUE_HANDLER(int64, int64_t, PRId64) -NUMERIC_VALUE_HANDLER(float, float, "g") -NUMERIC_VALUE_HANDLER(double, double, "g") - -bool value_bool(int* depth, const uint32_t* num, bool val) { - indentbuf(&output, *depth); - appendf(&output, "%" PRIu32 ":%s\n", *num, val ? "true" : "false"); - return true; -} - -int* startstr(int* depth, const uint32_t* num, size_t size_hint) { - indentbuf(&output, *depth); - appendf(&output, "%" PRIu32 ":(%zu)\"", *num, size_hint); - return depth + 1; -} - -size_t value_string(int* depth, const uint32_t* num, const char* buf, - size_t n, const upb_bufhandle* handle) { - UPB_UNUSED(num); - UPB_UNUSED(depth); - output.append(buf, n); - ASSERT(handle == &global_handle); - return n; -} - -bool endstr(int* depth, const uint32_t* num) { - UPB_UNUSED(num); - output.append("\n"); - indentbuf(&output, *depth); - appendf(&output, "%" PRIu32 ":\"\n", *num); - return true; -} - -int* startsubmsg(int* depth, const uint32_t* num) { - indentbuf(&output, *depth); - appendf(&output, "%" PRIu32 ":{\n", *num); - return depth + 1; -} - -bool endsubmsg(int* depth, const uint32_t* num) { - UPB_UNUSED(num); - indentbuf(&output, *depth); - output.append("}\n"); - return true; -} - -int* startseq(int* depth, const uint32_t* num) { - indentbuf(&output, *depth); - appendf(&output, "%" PRIu32 ":[\n", *num); - return depth + 1; -} - -bool endseq(int* depth, const uint32_t* num) { - UPB_UNUSED(num); - indentbuf(&output, *depth); - output.append("]\n"); - return true; -} - -bool startmsg(int* depth) { - indentbuf(&output, *depth); - output.append("<\n"); - return true; -} - -bool endmsg(int* depth, upb_status* status) { - UPB_UNUSED(status); - indentbuf(&output, *depth); - output.append(">\n"); - return true; -} - -void free_uint32(void *val) { - uint32_t *u32 = static_cast(val); - delete u32; -} - -template -void doreg(upb::HandlersPtr h, uint32_t num) { - upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num); - ASSERT(f); - ASSERT(h.SetValueHandler(f, UpbBind(F, new uint32_t(num)))); - if (f.IsSequence()) { - ASSERT(h.SetStartSequenceHandler(f, UpbBind(startseq, new uint32_t(num)))); - ASSERT(h.SetEndSequenceHandler(f, UpbBind(endseq, new uint32_t(num)))); - } -} - -// The repeated field number to correspond to the given non-repeated field -// number. -uint32_t rep_fn(uint32_t fn) { - return (UPB_MAX_FIELDNUMBER - 1000) + fn; -} - -#define NOP_FIELD 40 -#define UNKNOWN_FIELD 666 - -template -void reg(upb::HandlersPtr h, upb_descriptortype_t type) { - // We register both a repeated and a non-repeated field for every type. - // For the non-repeated field we make the field number the same as the - // type. For the repeated field we make it a function of the type. - doreg(h, type); - doreg(h, rep_fn(type)); -} - -void regseq(upb::HandlersPtr h, upb::FieldDefPtr f, uint32_t num) { - ASSERT(h.SetStartSequenceHandler(f, UpbBind(startseq, new uint32_t(num)))); - ASSERT(h.SetEndSequenceHandler(f, UpbBind(endseq, new uint32_t(num)))); -} - -void reg_subm(upb::HandlersPtr h, uint32_t num) { - upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num); - ASSERT(f); - if (f.IsSequence()) regseq(h, f, num); - ASSERT( - h.SetStartSubMessageHandler(f, UpbBind(startsubmsg, new uint32_t(num)))); - ASSERT(h.SetEndSubMessageHandler(f, UpbBind(endsubmsg, new uint32_t(num)))); -} - -void reg_str(upb::HandlersPtr h, uint32_t num) { - upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num); - ASSERT(f); - if (f.IsSequence()) regseq(h, f, num); - ASSERT(h.SetStartStringHandler(f, UpbBind(startstr, new uint32_t(num)))); - ASSERT(h.SetEndStringHandler(f, UpbBind(endstr, new uint32_t(num)))); - ASSERT(h.SetStringHandler(f, UpbBind(value_string, new uint32_t(num)))); -} - -struct HandlerRegisterData { - TestMode mode; -}; - -void callback(const void *closure, upb::Handlers* h_ptr) { - upb::HandlersPtr h(h_ptr); - const HandlerRegisterData* data = - static_cast(closure); - if (data->mode == ALL_HANDLERS) { - h.SetStartMessageHandler(UpbMakeHandler(startmsg)); - h.SetEndMessageHandler(UpbMakeHandler(endmsg)); - - // Register handlers for each type. - reg(h, UPB_DESCRIPTOR_TYPE_DOUBLE); - reg (h, UPB_DESCRIPTOR_TYPE_FLOAT); - reg (h, UPB_DESCRIPTOR_TYPE_INT64); - reg(h, UPB_DESCRIPTOR_TYPE_UINT64); - reg (h, UPB_DESCRIPTOR_TYPE_INT32); - reg(h, UPB_DESCRIPTOR_TYPE_FIXED64); - reg(h, UPB_DESCRIPTOR_TYPE_FIXED32); - reg (h, UPB_DESCRIPTOR_TYPE_BOOL); - reg(h, UPB_DESCRIPTOR_TYPE_UINT32); - reg (h, UPB_DESCRIPTOR_TYPE_ENUM); - reg (h, UPB_DESCRIPTOR_TYPE_SFIXED32); - reg (h, UPB_DESCRIPTOR_TYPE_SFIXED64); - reg (h, UPB_DESCRIPTOR_TYPE_SINT32); - reg (h, UPB_DESCRIPTOR_TYPE_SINT64); - - reg_str(h, UPB_DESCRIPTOR_TYPE_STRING); - reg_str(h, UPB_DESCRIPTOR_TYPE_BYTES); - reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_STRING)); - reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_BYTES)); - - // Register submessage/group handlers that are self-recursive - // to this type, eg: message M { optional M m = 1; } - reg_subm(h, UPB_DESCRIPTOR_TYPE_MESSAGE); - reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE)); - - if (h.message_def().full_name() == std::string("DecoderTest")) { - reg_subm(h, UPB_DESCRIPTOR_TYPE_GROUP); - reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_GROUP)); - } - - // For NOP_FIELD we register no handlers, so we can pad a proto freely without - // changing the output. - } -} - -/* Running of test cases ******************************************************/ - -const upb::Handlers *global_handlers; -upb::pb::DecoderMethodPtr global_method; - -upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena, - upb::pb::DecoderMethodPtr method, - upb::Sink sink, upb::Status* status) { - upb::pb::DecoderPtr ret = - upb::pb::DecoderPtr::Create(arena, method, sink, status); - ret.set_max_nesting(MAX_NESTING); - return ret; -} - -void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) { - // We can't have parsed more data than the decoder callback is telling us it - // parsed. - ASSERT(decoder.BytesParsed() <= ofs); - - // The difference between what we've decoded and what the decoder has accepted - // represents the internally buffered amount. This amount should not exceed - // this value which comes from decoder.int.h. - ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES)); -} - -static bool parse(VerboseParserEnvironment* env, - upb::pb::DecoderPtr decoder, int bytes) { - CheckBytesParsed(decoder, env->ofs()); - bool ret = env->ParseBuffer(bytes); - if (ret) { - CheckBytesParsed(decoder, env->ofs()); - } - - return ret; -} - -void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder, - const string& proto, const string* expected_output, - size_t i, size_t j, bool may_skip) { - env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL); - decoder.Reset(); - - if (test_mode != COUNT_ONLY) { - output.clear(); - - if (filter_hash) { - fprintf(stderr, "RUNNING TEST CASE\n"); - fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size()); - PrintBinary(proto); - fprintf(stderr, "\n"); - if (expected_output) { - if (test_mode == ALL_HANDLERS) { - fprintf(stderr, "Expected output: %s\n", expected_output->c_str()); - } else if (test_mode == NO_HANDLERS) { - fprintf(stderr, - "No handlers are registered, BUT if they were " - "the expected output would be: %s\n", - expected_output->c_str()); - } - } else { - fprintf(stderr, "Expected to FAIL\n"); - } - } - - bool ok = env->Start() && - parse(env, decoder, (int)i) && - parse(env, decoder, (int)(j - i)) && - parse(env, decoder, -1) && - env->End(); - - ASSERT(env->CheckConsistency()); - - if (test_mode == ALL_HANDLERS) { - if (expected_output) { - if (output != *expected_output) { - fprintf(stderr, "Text mismatch: '%s' vs '%s'\n", - output.c_str(), expected_output->c_str()); - } - ASSERT(ok); - ASSERT(output == *expected_output); - } else { - if (ok) { - fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n", - output.c_str()); - } - ASSERT(!ok); - } - } - } - (*count)++; -} - -void run_decoder(const string& proto, const string* expected_output) { - VerboseParserEnvironment env(filter_hash != 0); - upb::Sink sink(global_handlers, &closures[0]); - upb::pb::DecoderPtr decoder = CreateDecoder(env.arena(), global_method, sink, env.status()); - env.ResetBytesSink(decoder.input()); - for (size_t i = 0; i < proto.size(); i++) { - for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) { - do_run_decoder(&env, decoder, proto, expected_output, i, j, true); - if (env.SkippedWithNull()) { - do_run_decoder(&env, decoder, proto, expected_output, i, j, false); - } - } - } -} - -const static string thirty_byte_nop = cat( - tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) ); - -// Indents and wraps text as if it were a submessage with this field number -string wrap_text(int32_t fn, const string& text) { - string wrapped_text = text; - size_t pos = 0; - string replace_with = "\n "; - while ((pos = wrapped_text.find('\n', pos)) != string::npos && - pos != wrapped_text.size() - 1) { - wrapped_text.replace(pos, 1, replace_with); - pos += replace_with.size(); - } - wrapped_text = cat( - LINE("<"), - num2string(fn), LINE(":{") - " ", wrapped_text, - LINE(" }") - LINE(">")); - return wrapped_text; -} - -void assert_successful_parse(const string& proto, - const char *expected_fmt, ...) { - string expected_text; - va_list args; - va_start(args, expected_fmt); - vappendf(&expected_text, expected_fmt, args); - va_end(args); - // To test both middle-of-buffer and end-of-buffer code paths, - // repeat once with no-op padding data at the end of buffer. - run_decoder(proto, &expected_text); - run_decoder(cat( proto, thirty_byte_nop ), &expected_text); - - // Test that this also works when wrapped in a submessage or group. - // Indent the expected text one level and wrap it. - string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text); - string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text); - - run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1); - run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2); -} - -void assert_does_not_parse_at_eof(const string& proto) { - run_decoder(proto, NULL); - - // Also test that we fail to parse at end-of-submessage, not just - // end-of-message. But skip this if we have no handlers, because in that - // case we won't descend into the submessage. - if (test_mode != NO_HANDLERS) { - run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL); - run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), - thirty_byte_nop), NULL); - } -} - -void assert_does_not_parse(const string& proto) { - // Test that the error is caught both at end-of-buffer and middle-of-buffer. - assert_does_not_parse_at_eof(proto); - assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop )); -} - - -/* The actual tests ***********************************************************/ - -void test_premature_eof_for_type(upb_descriptortype_t type) { - // Incomplete values for each wire type. - static const string incompletes[6] = { - string("\x80"), // UPB_WIRE_TYPE_VARINT - string("abcdefg"), // UPB_WIRE_TYPE_64BIT - string("\x80"), // UPB_WIRE_TYPE_DELIMITED (partial length) - string(), // UPB_WIRE_TYPE_START_GROUP (no value required) - string(), // UPB_WIRE_TYPE_END_GROUP (no value required) - string("abc") // UPB_WIRE_TYPE_32BIT - }; - - uint32_t fieldnum = type; - uint32_t rep_fieldnum = rep_fn(type); - int wire_type = upb_decoder_types[type].native_wire_type; - const string& incomplete = incompletes[wire_type]; - - // EOF before a known non-repeated value. - assert_does_not_parse_at_eof(tag(fieldnum, wire_type)); - - // EOF before a known repeated value. - assert_does_not_parse_at_eof(tag(rep_fieldnum, wire_type)); - - // EOF before an unknown value. - assert_does_not_parse_at_eof(tag(UNKNOWN_FIELD, wire_type)); - - // EOF inside a known non-repeated value. - assert_does_not_parse_at_eof( - cat( tag(fieldnum, wire_type), incomplete )); - - // EOF inside a known repeated value. - assert_does_not_parse_at_eof( - cat( tag(rep_fieldnum, wire_type), incomplete )); - - // EOF inside an unknown value. - assert_does_not_parse_at_eof( - cat( tag(UNKNOWN_FIELD, wire_type), incomplete )); - - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - // EOF in the middle of delimited data for known non-repeated value. - assert_does_not_parse_at_eof( - cat( tag(fieldnum, wire_type), varint(1) )); - - // EOF in the middle of delimited data for known repeated value. - assert_does_not_parse_at_eof( - cat( tag(rep_fieldnum, wire_type), varint(1) )); - - // EOF in the middle of delimited data for unknown value. - assert_does_not_parse_at_eof( - cat( tag(UNKNOWN_FIELD, wire_type), varint(1) )); - - if (type == UPB_DESCRIPTOR_TYPE_MESSAGE) { - // Submessage ends in the middle of a value. - string incomplete_submsg = - cat ( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), - incompletes[UPB_WIRE_TYPE_VARINT] ); - assert_does_not_parse( - cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED), - varint(incomplete_submsg.size()), - incomplete_submsg )); - } - } else { - // Packed region ends in the middle of a value. - assert_does_not_parse( - cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), - varint(incomplete.size()), - incomplete )); - - // EOF in the middle of packed region. - assert_does_not_parse_at_eof( - cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) )); - } -} - -// "33" and "66" are just two random values that all numeric types can -// represent. -void test_valid_data_for_type(upb_descriptortype_t type, - const string& enc33, const string& enc66) { - uint32_t fieldnum = type; - uint32_t rep_fieldnum = rep_fn(type); - int wire_type = upb_decoder_types[type].native_wire_type; - - // Non-repeated - assert_successful_parse( - cat( tag(fieldnum, wire_type), enc33, - tag(fieldnum, wire_type), enc66 ), - LINE("<") - LINE("%u:33") - LINE("%u:66") - LINE(">"), fieldnum, fieldnum); - - // Non-packed repeated. - assert_successful_parse( - cat( tag(rep_fieldnum, wire_type), enc33, - tag(rep_fieldnum, wire_type), enc66 ), - LINE("<") - LINE("%u:[") - LINE(" %u:33") - LINE(" %u:66") - LINE("]") - LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); - - // Packed repeated. - assert_successful_parse( - cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), - delim(cat( enc33, enc66 )) ), - LINE("<") - LINE("%u:[") - LINE(" %u:33") - LINE(" %u:66") - LINE("]") - LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); -} - -void test_valid_data_for_signed_type(upb_descriptortype_t type, - const string& enc33, const string& enc66) { - uint32_t fieldnum = type; - uint32_t rep_fieldnum = rep_fn(type); - int wire_type = upb_decoder_types[type].native_wire_type; - - // Non-repeated - assert_successful_parse( - cat( tag(fieldnum, wire_type), enc33, - tag(fieldnum, wire_type), enc66 ), - LINE("<") - LINE("%u:33") - LINE("%u:-66") - LINE(">"), fieldnum, fieldnum); - - // Non-packed repeated. - assert_successful_parse( - cat( tag(rep_fieldnum, wire_type), enc33, - tag(rep_fieldnum, wire_type), enc66 ), - LINE("<") - LINE("%u:[") - LINE(" %u:33") - LINE(" %u:-66") - LINE("]") - LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); - - // Packed repeated. - assert_successful_parse( - cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), - delim(cat( enc33, enc66 )) ), - LINE("<") - LINE("%u:[") - LINE(" %u:33") - LINE(" %u:-66") - LINE("]") - LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum); -} - -// Test that invalid protobufs are properly detected (without crashing) and -// have an error reported. Field numbers match registered handlers above. -void test_invalid() { - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_DOUBLE); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FLOAT); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT64); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT64); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT32); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED64); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED32); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BOOL); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_STRING); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BYTES); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT32); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_ENUM); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED32); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED64); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT32); - test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT64); - - // EOF inside a tag's varint. - assert_does_not_parse_at_eof( string("\x80") ); - - // EOF inside a known group. - // TODO(haberman): add group to decoder test schema. - //assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) ); - - // EOF inside an unknown group. - assert_does_not_parse_at_eof( tag(UNKNOWN_FIELD, UPB_WIRE_TYPE_START_GROUP) ); - - // End group that we are not currently in. - assert_does_not_parse( tag(4, UPB_WIRE_TYPE_END_GROUP) ); - - // Field number is 0. - assert_does_not_parse( - cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) )); - // The previous test alone did not catch this particular pattern which could - // corrupt the internal state. - assert_does_not_parse( - cat( tag(0, UPB_WIRE_TYPE_64BIT), uint64(0) )); - - // Field number is too large. - assert_does_not_parse( - cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED), - varint(0) )); - - // Known group inside a submessage has ENDGROUP tag AFTER submessage end. - assert_does_not_parse( - cat ( submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, - tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)), - tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP))); - - // Unknown string extends past enclosing submessage. - assert_does_not_parse( - cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE, - submsg(12345, string(" "))), - submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" ")))); - - // Unknown fixed-length field extends past enclosing submessage. - assert_does_not_parse( - cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE, - cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(0))), - submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" ")))); - - // Test exceeding the resource limit of stack depth. - if (test_mode != NO_HANDLERS) { - string buf; - for (int i = 0; i <= MAX_NESTING; i++) { - buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); - } - assert_does_not_parse(buf); - } -} - -void test_valid() { - // Empty protobuf. - assert_successful_parse(string(""), "<\n>\n"); - - // Empty protobuf where we never call PutString between - // StartString/EndString. - - upb::Status status; - upb::Arena arena; - upb::Sink sink(global_handlers, &closures[0]); - upb::pb::DecoderPtr decoder = - CreateDecoder(&arena, global_method, sink, &status); - output.clear(); - bool ok = upb::PutBuffer(std::string(), decoder.input()); - ASSERT(ok); - ASSERT(status.ok()); - if (test_mode == ALL_HANDLERS) { - ASSERT(output == string("<\n>\n")); - } - - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE, - dbl(33), - dbl(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_FLOAT, flt(33), flt(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT64, - varint(33), - varint(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT32, - varint(33), - varint(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_ENUM, - varint(33), - varint(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED32, - uint32(33), - uint32(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED64, - uint64(33), - uint64(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT32, - zz32(33), - zz32(-66)); - test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT64, - zz64(33), - zz64(-66)); - - test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT64, varint(33), varint(66)); - test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT32, varint(33), varint(66)); - test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED64, uint64(33), uint64(66)); - test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED32, uint32(33), uint32(66)); - - // Unknown fields. - int int32_type = UPB_DESCRIPTOR_TYPE_INT32; - int msg_type = UPB_DESCRIPTOR_TYPE_MESSAGE; - assert_successful_parse( - cat( tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ), - "<\n>\n"); - assert_successful_parse( - cat( tag(12345, UPB_WIRE_TYPE_32BIT), uint32(2345678) ), - "<\n>\n"); - assert_successful_parse( - cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(2345678) ), - "<\n>\n"); - assert_successful_parse( - submsg(12345, string(" ")), - "<\n>\n"); - - // Unknown field inside a known submessage. - assert_successful_parse( - submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))), - LINE("<") - LINE("%u:{") - LINE(" <") - LINE(" >") - LINE(" }") - LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE); - - assert_successful_parse( - cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))), - tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), - varint(5)), - LINE("<") - LINE("%u:{") - LINE(" <") - LINE(" >") - LINE(" }") - LINE("%u:5") - LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE, UPB_DESCRIPTOR_TYPE_INT32); - - // This triggered a previous bug in the decoder. - assert_successful_parse( - cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT), - varint(0) ), - "<\n>\n"); - - assert_successful_parse( - cat( - submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, - submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, - cat( tag(int32_type, UPB_WIRE_TYPE_VARINT), varint(2345678), - tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ))), - tag(int32_type, UPB_WIRE_TYPE_VARINT), varint(22222)), - LINE("<") - LINE("%u:{") - LINE(" <") - LINE(" %u:{") - LINE(" <") - LINE(" %u:2345678") - LINE(" >") - LINE(" }") - LINE(" >") - LINE(" }") - LINE("%u:22222") - LINE(">"), msg_type, msg_type, int32_type, int32_type); - - assert_successful_parse( - cat( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), varint(1), - tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ), - LINE("<") - LINE("%u:1") - LINE(">"), UPB_DESCRIPTOR_TYPE_INT32); - - // String inside submsg. - uint32_t msg_fn = UPB_DESCRIPTOR_TYPE_MESSAGE; - assert_successful_parse( - submsg(msg_fn, - cat ( tag(UPB_DESCRIPTOR_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED), - delim(string("abcde")) - ) - ), - LINE("<") - LINE("%u:{") - LINE(" <") - LINE(" %u:(5)\"abcde") - LINE(" %u:\"") - LINE(" >") - LINE(" }") - LINE(">"), msg_fn, UPB_DESCRIPTOR_TYPE_STRING, - UPB_DESCRIPTOR_TYPE_STRING); - - // Test implicit startseq/endseq. - uint32_t repfl_fn = rep_fn(UPB_DESCRIPTOR_TYPE_FLOAT); - uint32_t repdb_fn = rep_fn(UPB_DESCRIPTOR_TYPE_DOUBLE); - assert_successful_parse( - cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33), - tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ), - LINE("<") - LINE("%u:[") - LINE(" %u:33") - LINE("]") - LINE("%u:[") - LINE(" %u:66") - LINE("]") - LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn); - - // Submessage tests. - assert_successful_parse( - submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, string()))), - LINE("<") - LINE("%u:{") - LINE(" <") - LINE(" %u:{") - LINE(" <") - LINE(" %u:{") - LINE(" <") - LINE(" >") - LINE(" }") - LINE(" >") - LINE(" }") - LINE(" >") - LINE(" }") - LINE(">"), msg_fn, msg_fn, msg_fn); - - uint32_t repm_fn = rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE); - assert_successful_parse( - submsg(repm_fn, submsg(repm_fn, string())), - LINE("<") - LINE("%u:[") - LINE(" %u:{") - LINE(" <") - LINE(" %u:[") - LINE(" %u:{") - LINE(" <") - LINE(" >") - LINE(" }") - LINE(" ]") - LINE(" >") - LINE(" }") - LINE("]") - LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn); - - // Test unknown group. - uint32_t unknown_group_fn = 12321; - assert_successful_parse( - cat( tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP), - tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) ), - LINE("<") - LINE(">") - ); - - // Test some unknown fields inside an unknown group. - const string unknown_group_with_data = - cat( - tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP), - tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678), - tag(123456789, UPB_WIRE_TYPE_32BIT), uint32(2345678), - tag(123477, UPB_WIRE_TYPE_64BIT), uint64(2345678), - tag(123, UPB_WIRE_TYPE_DELIMITED), varint(0), - tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) - ); - - // Nested unknown group with data. - assert_successful_parse( - cat( - tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP), - unknown_group_with_data, - tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP), - tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), varint(1) - ), - LINE("<") - LINE("%u:1") - LINE(">"), - UPB_DESCRIPTOR_TYPE_INT32 - ); - - assert_successful_parse( - cat( tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP), - tag(unknown_group_fn + 1, UPB_WIRE_TYPE_START_GROUP), - tag(unknown_group_fn + 1, UPB_WIRE_TYPE_END_GROUP), - tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) ), - LINE("<") - LINE(">") - ); - - // Staying within the stack limit should work properly. - string buf; - string textbuf; - int total = MAX_NESTING - 1; - for (int i = 0; i < total; i++) { - buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); - indentbuf(&textbuf, i); - textbuf.append("<\n"); - indentbuf(&textbuf, i); - appendf(&textbuf, "%u:{\n", UPB_DESCRIPTOR_TYPE_MESSAGE); - } - indentbuf(&textbuf, total); - textbuf.append("<\n"); - indentbuf(&textbuf, total); - textbuf.append(">\n"); - for (int i = 0; i < total; i++) { - indentbuf(&textbuf, total - i - 1); - textbuf.append(" }\n"); - indentbuf(&textbuf, total - i - 1); - textbuf.append(">\n"); - } - // Have to use run_decoder directly, because we are at max nesting and can't - // afford the extra nesting that assert_successful_parse() will do. - run_decoder(buf, &textbuf); -} - -void empty_callback(const void* /* closure */, upb::Handlers* /* h_ptr */) {} - -void test_emptyhandlers(upb::SymbolTable* symtab) { - // Create an empty handlers to make sure that the decoder can handle empty - // messages. - HandlerRegisterData handlerdata; - handlerdata.mode = test_mode; - - upb::HandlerCache handler_cache(empty_callback, &handlerdata); - upb::pb::CodeCache pb_code_cache(&handler_cache); - - upb::MessageDefPtr md = upb::MessageDefPtr(Empty_getmsgdef(symtab->ptr())); - global_handlers = handler_cache.Get(md); - global_method = pb_code_cache.Get(md); - - // TODO: also test the case where a message has fields, but the fields are - // submessage fields and have no handlers. This also results in a decoder - // method with no field-handling code. - - // Ensure that the method can run with empty and non-empty input. - string test_unknown_field_msg = - cat(tag(1, UPB_WIRE_TYPE_VARINT), varint(42), - tag(2, UPB_WIRE_TYPE_DELIMITED), delim("My test data")); - const struct { - const char* data; - size_t length; - } testdata[] = { - { "", 0 }, - { test_unknown_field_msg.data(), test_unknown_field_msg.size() }, - { NULL, 0 }, - }; - for (int i = 0; testdata[i].data; i++) { - VerboseParserEnvironment env(filter_hash != 0); - upb::Sink sink(global_method.dest_handlers(), &closures[0]); - upb::pb::DecoderPtr decoder = - CreateDecoder(env.arena(), global_method, sink, env.status()); - env.ResetBytesSink(decoder.input()); - env.Reset(testdata[i].data, testdata[i].length, true, false); - ASSERT(env.Start()); - ASSERT(env.ParseBuffer(-1)); - ASSERT(env.End()); - ASSERT(env.CheckConsistency()); - } -} - -void run_tests() { - HandlerRegisterData handlerdata; - handlerdata.mode = test_mode; - - upb::SymbolTable symtab; - upb::HandlerCache handler_cache(callback, &handlerdata); - upb::pb::CodeCache pb_code_cache(&handler_cache); - - upb::MessageDefPtr md(DecoderTest_getmsgdef(symtab.ptr())); - global_handlers = handler_cache.Get(md); - global_method = pb_code_cache.Get(md); - completed = 0; - - test_invalid(); - test_valid(); - - test_emptyhandlers(&symtab); -} - -extern "C" { - -int run_tests(int argc, char *argv[]) { - if (argc > 1) - filter_hash = (uint32_t)strtol(argv[1], NULL, 16); - for (int i = 0; i < MAX_NESTING; i++) { - closures[i] = i; - } - - // Count tests. - count = &total; - total = 0; - test_mode = COUNT_ONLY; - run_tests(); - count = &completed; - - total *= 2; // NO_HANDLERS, ALL_HANDLERS. - - test_mode = NO_HANDLERS; - run_tests(); - - test_mode = ALL_HANDLERS; - run_tests(); - - printf("All tests passed, %d assertions.\n", num_assertions); - return 0; -} - -} diff --git a/tests/pb/test_decoder.proto b/tests/pb/test_decoder.proto deleted file mode 100644 index e9fa6ad32c..0000000000 --- a/tests/pb/test_decoder.proto +++ /dev/null @@ -1,128 +0,0 @@ - -syntax = "proto2"; - -enum TestEnum { - FOO = 1; -} - -message Empty {} - -message DecoderTest { - optional double f_double = 1; - optional float f_float = 2; - optional int64 f_int64 = 3; - optional uint64 f_uint64 = 4; - optional int32 f_int32 = 5; - optional fixed64 f_fixed64 = 6; - optional fixed32 f_fixed32 = 7; - optional bool f_bool = 8; - optional string f_string = 9; - optional DecoderTest f_message = 11; - optional bytes f_bytes = 12; - optional uint32 f_uint32 = 13; - optional TestEnum f_enum = 14; - optional sfixed32 f_sfixed32 = 15; - optional sfixed64 f_sfixed64 = 16; - optional sint32 f_sint32 = 17; - optional sint64 f_sint64 = 18; - - optional string nop_field = 40; - - repeated double r_double = 536869912; - repeated float r_float = 536869913; - repeated int64 r_int64 = 536869914; - repeated uint64 r_uint64 = 536869915; - repeated int32 r_int32 = 536869916; - repeated fixed64 r_fixed64 = 536869917; - repeated fixed32 r_fixed32 = 536869918; - repeated bool r_bool = 536869919; - repeated string r_string = 536869920; - repeated DecoderTest r_message = 536869922; - repeated bytes r_bytes = 536869923; - repeated uint32 r_uint32 = 536869924; - repeated TestEnum r_enum = 536869925; - repeated sfixed32 r_sfixed32 = 536869926; - repeated sfixed64 r_sfixed64 = 536869927; - repeated sint32 r_sint32 = 536869928; - repeated sint64 r_sint64 = 536869929; - - optional group F_group = 10 { - optional double f_double = 1; - optional float f_float = 2; - optional int64 f_int64 = 3; - optional uint64 f_uint64 = 4; - optional int32 f_int32 = 5; - optional fixed64 f_fixed64 = 6; - optional fixed32 f_fixed32 = 7; - optional bool f_bool = 8; - optional string f_string = 9; - optional DecoderTest f_message = 11; - optional bytes f_bytes = 12; - optional uint32 f_uint32 = 13; - optional TestEnum f_enum = 14; - optional sfixed32 f_sfixed32 = 15; - optional sfixed64 f_sfixed64 = 16; - optional sint32 f_sint32 = 17; - optional sint64 f_sint64 = 18; - - optional string nop_field = 40; - - repeated double r_double = 536869912; - repeated float r_float = 536869913; - repeated int64 r_int64 = 536869914; - repeated uint64 r_uint64 = 536869915; - repeated int32 r_int32 = 536869916; - repeated fixed64 r_fixed64 = 536869917; - repeated fixed32 r_fixed32 = 536869918; - repeated bool r_bool = 536869919; - repeated string r_string = 536869920; - repeated DecoderTest r_message = 536869922; - repeated bytes r_bytes = 536869923; - repeated uint32 r_uint32 = 536869924; - repeated TestEnum r_enum = 536869925; - repeated sfixed32 r_sfixed32 = 536869926; - repeated sfixed64 r_sfixed64 = 536869927; - repeated sint32 r_sint32 = 536869928; - repeated sint64 r_sint64 = 536869929; - } - - optional group R_group = 536869921 { - optional double f_double = 1; - optional float f_float = 2; - optional int64 f_int64 = 3; - optional uint64 f_uint64 = 4; - optional int32 f_int32 = 5; - optional fixed64 f_fixed64 = 6; - optional fixed32 f_fixed32 = 7; - optional bool f_bool = 8; - optional string f_string = 9; - optional DecoderTest f_message = 11; - optional bytes f_bytes = 12; - optional uint32 f_uint32 = 13; - optional TestEnum f_enum = 14; - optional sfixed32 f_sfixed32 = 15; - optional sfixed64 f_sfixed64 = 16; - optional sint32 f_sint32 = 17; - optional sint64 f_sint64 = 18; - - optional string nop_field = 40; - - repeated double r_double = 536869912; - repeated float r_float = 536869913; - repeated int64 r_int64 = 536869914; - repeated uint64 r_uint64 = 536869915; - repeated int32 r_int32 = 536869916; - repeated fixed64 r_fixed64 = 536869917; - repeated fixed32 r_fixed32 = 536869918; - repeated bool r_bool = 536869919; - repeated string r_string = 536869920; - repeated DecoderTest r_message = 536869922; - repeated bytes r_bytes = 536869923; - repeated uint32 r_uint32 = 536869924; - repeated TestEnum r_enum = 536869925; - repeated sfixed32 r_sfixed32 = 536869926; - repeated sfixed64 r_sfixed64 = 536869927; - repeated sint32 r_sint32 = 536869928; - repeated sint64 r_sint64 = 536869929; - } -} diff --git a/tests/pb/test_encoder.cc b/tests/pb/test_encoder.cc deleted file mode 100644 index b358ef592c..0000000000 --- a/tests/pb/test_encoder.cc +++ /dev/null @@ -1,102 +0,0 @@ - -#include - -#include "google/protobuf/descriptor.upb.h" -#include "google/protobuf/descriptor.upbdefs.h" -#include "tests/test_util.h" -#include "tests/upb_test.h" -#include "upb/pb/decoder.h" -#include "upb/pb/encoder.h" -#include "upb/port_def.inc" -#include "upb/upb.hpp" - -template -class FillStringHandler { - public: - static void SetHandler(upb_byteshandler* handler) { - upb_byteshandler_setstartstr(handler, &FillStringHandler::StartString, - NULL); - upb_byteshandler_setstring(handler, &FillStringHandler::StringBuf, NULL); - } - - private: - // TODO(haberman): add UpbBind/UpbMakeHandler support to BytesHandler so these - // can be prettier callbacks. - static void* StartString(void *c, const void *hd, size_t size) { - UPB_UNUSED(hd); - UPB_UNUSED(size); - - T* str = static_cast(c); - str->clear(); - return c; - } - - static size_t StringBuf(void* c, const void* hd, const char* buf, size_t n, - const upb_bufhandle* h) { - UPB_UNUSED(hd); - UPB_UNUSED(h); - - T* str = static_cast(c); - try { - str->append(buf, n); - return n; - } catch (const std::exception&) { - return 0; - } - } -}; - -class StringSink { - public: - template - explicit StringSink(T* target) { - // TODO(haberman): we need to avoid rebuilding a new handler every time, - // but with class globals disallowed for google3 C++ this is tricky. - upb_byteshandler_init(&handler_); - FillStringHandler::SetHandler(&handler_); - input_.Reset(&handler_, target); - } - - upb::BytesSink input() { return input_; } - - private: - upb_byteshandler handler_; - upb::BytesSink input_; -}; - -void test_pb_roundtrip() { - std::string input( - google_protobuf_descriptor_proto_upbdefinit.descriptor.data, - google_protobuf_descriptor_proto_upbdefinit.descriptor.size); - std::cout << input.size() << "\n"; - upb::SymbolTable symtab; - upb::HandlerCache encoder_cache(upb::pb::EncoderPtr::NewCache()); - upb::pb::CodeCache decoder_cache(&encoder_cache); - upb::Arena arena; - upb::Status status; - upb::MessageDefPtr md( - google_protobuf_FileDescriptorProto_getmsgdef(symtab.ptr())); - ASSERT(md); - const upb::Handlers *encoder_handlers = encoder_cache.Get(md); - ASSERT(encoder_handlers); - const upb::pb::DecoderMethodPtr method = decoder_cache.Get(md); - - std::string output; - StringSink string_sink(&output); - upb::pb::EncoderPtr encoder = - upb::pb::EncoderPtr::Create(&arena, encoder_handlers, string_sink.input()); - upb::pb::DecoderPtr decoder = - upb::pb::DecoderPtr::Create(&arena, method, encoder.input(), &status); - bool ok = upb::PutBuffer(input, decoder.input()); - ASSERT(ok); - ASSERT(input == output); -} - -extern "C" { -int run_tests(int argc, char *argv[]) { - UPB_UNUSED(argc); - UPB_UNUSED(argv); - test_pb_roundtrip(); - return 0; -} -} diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc index 15fa4b5aad..22a30a5429 100644 --- a/tests/test_cpp.cc +++ b/tests/test_cpp.cc @@ -14,887 +14,11 @@ #include "tests/test_cpp.upbdefs.h" #include "tests/upb_test.h" #include "upb/def.h" -#include "upb/handlers.h" -#include "upb/pb/decoder.h" -#include "upb/pb/textprinter.h" -#include "upb/port_def.inc" +#include "upb/def.hpp" #include "upb/upb.h" -template -void AssertInsert(T* const container, const typename T::value_type& val) { - bool inserted = container->insert(val).second; - ASSERT(inserted); -} - -// -// Tests for registering and calling handlers in all their variants. -// This test code is very repetitive because we have to declare each -// handler function variant separately, and they all have different -// signatures so it does not lend itself well to templates. -// -// We test three handler types: -// StartMessage (no data params) -// Int32 (1 data param (int32_t)) -// String Buf (2 data params (const char*, size_t)) -// -// For each handler type we test all 8 handler variants: -// (handler data?) x (function/method) x (returns {void, success}) -// -// The one notable thing we don't test at the moment is -// StartSequence/StartString handlers: these are different from StartMessage() -// in that they return void* for the sub-closure. But this is exercised in -// other tests. -// - -static const int kExpectedHandlerData = 1232323; - -class StringBufTesterBase { - public: - static constexpr int kFieldNumber = 3; - - StringBufTesterBase() : seen_(false), handler_data_val_(0) {} - - void CallAndVerify(upb::Sink sink, upb::FieldDefPtr f) { - upb_selector_t start; - ASSERT(upb_handlers_getselector(f.ptr(), UPB_HANDLER_STARTSTR, &start)); - upb_selector_t str; - ASSERT(upb_handlers_getselector(f.ptr(), UPB_HANDLER_STRING, &str)); - - ASSERT(!seen_); - upb::Sink sub; - sink.StartMessage(); - sink.StartString(start, 0, &sub); - size_t ret = sub.PutStringBuffer(str, &buf_, 5, &handle_); - ASSERT(seen_); - ASSERT(len_ == 5); - ASSERT(ret == 5); - ASSERT(handler_data_val_ == kExpectedHandlerData); - } - - protected: - bool seen_; - int handler_data_val_; - size_t len_; - char buf_; - upb_bufhandle handle_; -}; - -// Test 8 combinations of: -// (handler data?) x (buffer handle?) x (function/method) -// -// Then we add one test each for this variation: to prevent combinatorial -// explosion of these tests we don't test the full 16 combinations, but -// rely on our knowledge that the implementation processes the return wrapping -// in a second separate and independent stage: -// -// (function/method) - -class StringBufTesterVoidMethodNoHandlerDataNoHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidMethodNoHandlerDataNoHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - void Handler(const char *buf, size_t len) { - ASSERT(buf == &buf_); - seen_ = true; - len_ = len; - } -}; - -class StringBufTesterVoidMethodNoHandlerDataWithHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidMethodNoHandlerDataWithHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - void Handler(const char *buf, size_t len, const upb_bufhandle* handle) { - ASSERT(buf == &buf_); - ASSERT(handle == &handle_); - seen_ = true; - len_ = len; - } -}; - -class StringBufTesterVoidMethodWithHandlerDataNoHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidMethodWithHandlerDataNoHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler( - f, UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - void Handler(const int* hd, const char *buf, size_t len) { - ASSERT(buf == &buf_); - handler_data_val_ = *hd; - seen_ = true; - len_ = len; - } -}; - -class StringBufTesterVoidMethodWithHandlerDataWithHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidMethodWithHandlerDataWithHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler( - f, UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - void Handler(const int* hd, const char* buf, size_t len, - const upb_bufhandle* handle) { - ASSERT(buf == &buf_); - ASSERT(handle == &handle_); - handler_data_val_ = *hd; - seen_ = true; - len_ = len; - } -}; - -class StringBufTesterVoidFunctionNoHandlerDataNoHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidFunctionNoHandlerDataNoHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - static void Handler(ME* t, const char *buf, size_t len) { - ASSERT(buf == &t->buf_); - t->seen_ = true; - t->len_ = len; - } -}; - -class StringBufTesterVoidFunctionNoHandlerDataWithHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidFunctionNoHandlerDataWithHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - static void Handler(ME* t, const char* buf, size_t len, - const upb_bufhandle* handle) { - ASSERT(buf == &t->buf_); - ASSERT(handle == &t->handle_); - t->seen_ = true; - t->len_ = len; - } -}; - -class StringBufTesterVoidFunctionWithHandlerDataNoHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidFunctionWithHandlerDataNoHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler( - f, UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - static void Handler(ME* t, const int* hd, const char *buf, size_t len) { - ASSERT(buf == &t->buf_); - t->handler_data_val_ = *hd; - t->seen_ = true; - t->len_ = len; - } -}; - -class StringBufTesterVoidFunctionWithHandlerDataWithHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterVoidFunctionWithHandlerDataWithHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler( - f, UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - static void Handler(ME* t, const int* hd, const char* buf, size_t len, - const upb_bufhandle* handle) { - ASSERT(buf == &t->buf_); - ASSERT(handle == &t->handle_); - t->handler_data_val_ = *hd; - t->seen_ = true; - t->len_ = len; - } -}; - -class StringBufTesterSizeTMethodNoHandlerDataNoHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterSizeTMethodNoHandlerDataNoHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - size_t Handler(const char *buf, size_t len) { - ASSERT(buf == &buf_); - seen_ = true; - len_ = len; - return len; - } -}; - -class StringBufTesterBoolMethodNoHandlerDataNoHandle - : public StringBufTesterBase { - public: - typedef StringBufTesterBoolMethodNoHandlerDataNoHandle ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - bool Handler(const char *buf, size_t len) { - ASSERT(buf == &buf_); - seen_ = true; - len_ = len; - return true; - } -}; - -class StartMsgTesterBase { - public: - // We don't need the FieldDef it will create, but the test harness still - // requires that we provide one. - static constexpr int kFieldNumber = 3; - - StartMsgTesterBase() : seen_(false), handler_data_val_(0) {} - - void CallAndVerify(upb::Sink sink, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(!seen_); - sink.StartMessage(); - ASSERT(seen_); - ASSERT(handler_data_val_ == kExpectedHandlerData); - } - - protected: - bool seen_; - int handler_data_val_; -}; - -// Test all 8 combinations of: -// (handler data?) x (function/method) x (returns {void, bool}) - -class StartMsgTesterVoidFunctionNoHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterVoidFunctionNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - //static void Handler(ME* t) { - static void Handler(ME* t) { - t->seen_ = true; - } -}; - -class StartMsgTesterBoolFunctionNoHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterBoolFunctionNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - static bool Handler(ME* t) { - t->seen_ = true; - return true; - } -}; - -class StartMsgTesterVoidMethodNoHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterVoidMethodNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - void Handler() { - seen_ = true; - } -}; - -class StartMsgTesterBoolMethodNoHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterBoolMethodNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - bool Handler() { - seen_ = true; - return true; - } -}; - -class StartMsgTesterVoidFunctionWithHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterVoidFunctionWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler( - UpbBind(&Handler, new int(kExpectedHandlerData)))); - } - - private: - static void Handler(ME* t, const int* hd) { - t->handler_data_val_ = *hd; - t->seen_ = true; - } -}; - -class StartMsgTesterBoolFunctionWithHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterBoolFunctionWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler( - UpbBind(&Handler, new int(kExpectedHandlerData)))); - } - - private: - static bool Handler(ME* t, const int* hd) { - t->handler_data_val_ = *hd; - t->seen_ = true; - return true; - } -}; - -class StartMsgTesterVoidMethodWithHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterVoidMethodWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler( - UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - void Handler(const int* hd) { - handler_data_val_ = *hd; - seen_ = true; - } -}; - -class StartMsgTesterBoolMethodWithHandlerData : public StartMsgTesterBase { - public: - typedef StartMsgTesterBoolMethodWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - UPB_UNUSED(f); - ASSERT(h.SetStartMessageHandler( - UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - bool Handler(const int* hd) { - handler_data_val_ = *hd; - seen_ = true; - return true; - } -}; - -class Int32ValueTesterBase { - public: - static constexpr int kFieldNumber = 1; - - Int32ValueTesterBase() : seen_(false), val_(0), handler_data_val_(0) {} - - void CallAndVerify(upb::Sink sink, upb::FieldDefPtr f) { - upb_selector_t s; - ASSERT(upb_handlers_getselector(f.ptr(), UPB_HANDLER_INT32, &s)); - - ASSERT(!seen_); - sink.PutInt32(s, 5); - ASSERT(seen_); - ASSERT(handler_data_val_ == kExpectedHandlerData); - ASSERT(val_ == 5); - } - - protected: - bool seen_; - int32_t val_; - int handler_data_val_; -}; - -// Test all 8 combinations of: -// (handler data?) x (function/method) x (returns {void, bool}) - -class ValueTesterInt32VoidFunctionNoHandlerData - : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32VoidFunctionNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - static void Handler(ME* t, int32_t val) { - t->val_ = val; - t->seen_ = true; - } -}; - -class ValueTesterInt32BoolFunctionNoHandlerData - : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32BoolFunctionNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - static bool Handler(ME* t, int32_t val) { - t->val_ = val; - t->seen_ = true; - return true; - } -}; - -class ValueTesterInt32VoidMethodNoHandlerData : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32VoidMethodNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - void Handler(int32_t val) { - val_ = val; - seen_ = true; - } -}; - -class ValueTesterInt32BoolMethodNoHandlerData : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32BoolMethodNoHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&ME::Handler))); - handler_data_val_ = kExpectedHandlerData; - } - - private: - bool Handler(int32_t val) { - val_ = val; - seen_ = true; - return true; - } -}; - -class ValueTesterInt32VoidFunctionWithHandlerData - : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32VoidFunctionWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler( - f, UpbBind(&Handler, new int(kExpectedHandlerData)))); - } - - private: - static void Handler(ME* t, const int* hd, int32_t val) { - t->val_ = val; - t->handler_data_val_ = *hd; - t->seen_ = true; - } -}; - -class ValueTesterInt32BoolFunctionWithHandlerData - : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32BoolFunctionWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler( - f, UpbBind(&Handler, new int(kExpectedHandlerData)))); - } - - private: - static bool Handler(ME* t, const int* hd, int32_t val) { - t->val_ = val; - t->handler_data_val_ = *hd; - t->seen_ = true; - return true; - } -}; - -class ValueTesterInt32VoidMethodWithHandlerData : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32VoidMethodWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler( - f, UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - void Handler(const int* hd, int32_t val) { - val_ = val; - handler_data_val_ = *hd; - seen_ = true; - } -}; - -class ValueTesterInt32BoolMethodWithHandlerData : public Int32ValueTesterBase { - public: - typedef ValueTesterInt32BoolMethodWithHandlerData ME; - void Register(upb::HandlersPtr h, upb::FieldDefPtr f) { - ASSERT(h.SetInt32Handler( - f, UpbBind(&ME::Handler, new int(kExpectedHandlerData)))); - } - - private: - bool Handler(const int* hd, int32_t val) { - val_ = val; - handler_data_val_ = *hd; - seen_ = true; - return true; - } -}; - -template -void RegisterHandlers(const void* closure, upb::Handlers* h_ptr) { - T* tester = const_cast(static_cast(closure)); - upb::HandlersPtr h(h_ptr); - upb::FieldDefPtr f = h.message_def().FindFieldByNumber(T::kFieldNumber); - ASSERT(f); - tester->Register(h, f); -} - -template -void TestHandler() { - T tester; - upb::SymbolTable symtab; - upb::HandlerCache cache(&RegisterHandlers, &tester); - upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr())); - ASSERT(md); - upb::FieldDefPtr f = md.FindFieldByNumber(T::kFieldNumber); - ASSERT(f); - - const upb::Handlers* h = cache.Get(md); - - upb::Sink sink(h, &tester); - tester.CallAndVerify(sink, f); -} - -class T1 {}; -class T2 {}; - -template -void DoNothingHandler(C* closure) { - UPB_UNUSED(closure); -} - -template -void DoNothingInt32Handler(C* closure, int32_t val) { - UPB_UNUSED(closure); - UPB_UNUSED(val); -} - -template -class DoNothingStartHandler { - public: - // We wrap these functions inside of a class for a somewhat annoying reason. - // UpbMakeHandler() is a macro, so we can't say - // UpbMakeHandler(DoNothingStartHandler) - // - // because otherwise the preprocessor gets confused at the comma and tries to - // make it two macro arguments. The usual solution doesn't work either: - // UpbMakeHandler((DoNothingStartHandler)) - // - // If we do that the macro expands correctly, but then it tries to pass that - // parenthesized expression as a template parameter, ie. Type<(F)>, which - // isn't legal C++ (Clang will compile it but complains with - // warning: address non-type template argument cannot be surrounded by - // parentheses - // - // This two-level thing allows us to effectively pass two template parameters, - // but without any commas: - // UpbMakeHandler(DoNothingStartHandler::Handler) - template - static R* Handler(C* closure) { - UPB_UNUSED(closure); - return NULL; - } - - template - static R* String(C* closure, size_t size_len) { - UPB_UNUSED(closure); - UPB_UNUSED(size_len); - return NULL; - } -}; - -template -void DoNothingStringBufHandler(C* closure, const char *buf, size_t len) { - UPB_UNUSED(closure); - UPB_UNUSED(buf); - UPB_UNUSED(len); -} - -template -void DoNothingEndMessageHandler(C* closure, upb_status *status) { - UPB_UNUSED(closure); - UPB_UNUSED(status); -} - -void RegisterMismatchedTypes(const void* closure, upb::Handlers* h_ptr) { - upb::HandlersPtr h(h_ptr); - UPB_UNUSED(closure); - - upb::MessageDefPtr md(h.message_def()); - ASSERT(md); - upb::FieldDefPtr i32 = md.FindFieldByName("i32"); - upb::FieldDefPtr r_i32 = md.FindFieldByName("r_i32"); - upb::FieldDefPtr str = md.FindFieldByName("str"); - upb::FieldDefPtr r_str = md.FindFieldByName("r_str"); - upb::FieldDefPtr msg = md.FindFieldByName("msg"); - upb::FieldDefPtr r_msg = md.FindFieldByName("r_msg"); - ASSERT(i32); - ASSERT(r_i32); - ASSERT(str); - ASSERT(r_str); - ASSERT(msg); - ASSERT(r_msg); - - // Establish T1 as the top-level closure type. - ASSERT(h.SetInt32Handler(i32, UpbMakeHandler(DoNothingInt32Handler))); - - // Now any other attempt to set another handler with T2 as the top-level - // closure should fail. But setting these same handlers with T1 as the - // top-level closure will succeed. - ASSERT(!h.SetStartMessageHandler(UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetStartMessageHandler(UpbMakeHandler(DoNothingHandler))); - - ASSERT( - !h.SetEndMessageHandler(UpbMakeHandler(DoNothingEndMessageHandler))); - ASSERT( - h.SetEndMessageHandler(UpbMakeHandler(DoNothingEndMessageHandler))); - - ASSERT(!h.SetStartStringHandler( - str, UpbMakeHandler(DoNothingStartHandler::String))); - ASSERT(h.SetStartStringHandler( - str, UpbMakeHandler(DoNothingStartHandler::String))); - - ASSERT(!h.SetEndStringHandler(str, UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetEndStringHandler(str, UpbMakeHandler(DoNothingHandler))); - - ASSERT(!h.SetStartSubMessageHandler( - msg, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSubMessageHandler( - msg, UpbMakeHandler(DoNothingStartHandler::Handler))); - - ASSERT( - !h.SetEndSubMessageHandler(msg, UpbMakeHandler(DoNothingHandler))); - ASSERT( - h.SetEndSubMessageHandler(msg, UpbMakeHandler(DoNothingHandler))); - - ASSERT(!h.SetStartSequenceHandler( - r_i32, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSequenceHandler( - r_i32, UpbMakeHandler(DoNothingStartHandler::Handler))); - - ASSERT(!h.SetEndSequenceHandler( - r_i32, UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetEndSequenceHandler( - r_i32, UpbMakeHandler(DoNothingHandler))); - - ASSERT(!h.SetStartSequenceHandler( - r_msg, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSequenceHandler( - r_msg, UpbMakeHandler(DoNothingStartHandler::Handler))); - - ASSERT(!h.SetEndSequenceHandler( - r_msg, UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetEndSequenceHandler( - r_msg, UpbMakeHandler(DoNothingHandler))); - - ASSERT(!h.SetStartSequenceHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSequenceHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::Handler))); - - ASSERT(!h.SetEndSequenceHandler( - r_str, UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetEndSequenceHandler( - r_str, UpbMakeHandler(DoNothingHandler))); - - // By setting T1 as the return type for the Start* handlers we have - // established T1 as the type of the sequence and string frames. - // Setting callbacks that use T2 should fail, but T1 should succeed. - ASSERT( - !h.SetStringHandler(str, UpbMakeHandler(DoNothingStringBufHandler))); - ASSERT( - h.SetStringHandler(str, UpbMakeHandler(DoNothingStringBufHandler))); - - ASSERT(!h.SetInt32Handler(r_i32, UpbMakeHandler(DoNothingInt32Handler))); - ASSERT(h.SetInt32Handler(r_i32, UpbMakeHandler(DoNothingInt32Handler))); - - ASSERT(!h.SetStartSubMessageHandler( - r_msg, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSubMessageHandler( - r_msg, UpbMakeHandler(DoNothingStartHandler::Handler))); - - ASSERT(!h.SetEndSubMessageHandler(r_msg, - UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetEndSubMessageHandler(r_msg, - UpbMakeHandler(DoNothingHandler))); - - ASSERT(!h.SetStartStringHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::String))); - ASSERT(h.SetStartStringHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::String))); - - ASSERT( - !h.SetEndStringHandler(r_str, UpbMakeHandler(DoNothingHandler))); - ASSERT(h.SetEndStringHandler(r_str, UpbMakeHandler(DoNothingHandler))); - - ASSERT(!h.SetStringHandler(r_str, - UpbMakeHandler(DoNothingStringBufHandler))); - ASSERT(h.SetStringHandler(r_str, - UpbMakeHandler(DoNothingStringBufHandler))); -} - -void RegisterMismatchedTypes2(const void* closure, upb::Handlers* h_ptr) { - upb::HandlersPtr h(h_ptr); - UPB_UNUSED(closure); - - upb::MessageDefPtr md(h.message_def()); - ASSERT(md); - upb::FieldDefPtr i32 = md.FindFieldByName("i32"); - upb::FieldDefPtr r_i32 = md.FindFieldByName("r_i32"); - upb::FieldDefPtr str = md.FindFieldByName("str"); - upb::FieldDefPtr r_str = md.FindFieldByName("r_str"); - upb::FieldDefPtr msg = md.FindFieldByName("msg"); - upb::FieldDefPtr r_msg = md.FindFieldByName("r_msg"); - ASSERT(i32); - ASSERT(r_i32); - ASSERT(str); - ASSERT(r_str); - ASSERT(msg); - ASSERT(r_msg); - - // For our second test we do the same in reverse. We directly set the type of - // the frame and then observe failures at registering a Start* handler that - // returns a different type. - - // First establish the type of a sequence frame directly. - ASSERT(h.SetInt32Handler(r_i32, UpbMakeHandler(DoNothingInt32Handler))); - - // Now setting a StartSequence callback that returns a different type should - // fail. - ASSERT(!h.SetStartSequenceHandler( - r_i32, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSequenceHandler( - r_i32, UpbMakeHandler(DoNothingStartHandler::Handler))); - - // Establish a string frame directly. - ASSERT(h.SetStringHandler(r_str, - UpbMakeHandler(DoNothingStringBufHandler))); - - // Fail setting a StartString callback that returns a different type. - ASSERT(!h.SetStartStringHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::String))); - ASSERT(h.SetStartStringHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::String))); - - // The previous established T1 as the frame for the r_str sequence. - ASSERT(!h.SetStartSequenceHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::Handler))); - ASSERT(h.SetStartSequenceHandler( - r_str, UpbMakeHandler(DoNothingStartHandler::Handler))); -} - -void TestMismatchedTypes() { - // First create a schema for our test. - upb::SymbolTable symtab; - upb::HandlerCache handler_cache(&RegisterMismatchedTypes, nullptr); - upb::HandlerCache handler_cache2(&RegisterMismatchedTypes2, nullptr); - const upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr())); - - // Now test the type-checking in handler registration. - handler_cache.Get(md); - handler_cache2.Get(md); -} - -class IntIncrementer { - public: - explicit IntIncrementer(int* x) : x_(x) { (*x_)++; } - ~IntIncrementer() { (*x_)--; } - - static void Handler(void* closure, const IntIncrementer* incrementer, - int32_t x) { - UPB_UNUSED(closure); - UPB_UNUSED(incrementer); - UPB_UNUSED(x); - } - - private: - int* x_; -}; - -void RegisterIncrementor(const void* closure, upb::Handlers* h_ptr) { - const int* x = static_cast(closure); - upb::HandlersPtr h(h_ptr); - upb::FieldDefPtr f = h.message_def().FindFieldByName("i32"); - h.SetInt32Handler(f, UpbBind(&IntIncrementer::Handler, - new IntIncrementer(const_cast(x)))); -} - -void TestHandlerDataDestruction() { - int x = 0; - - { - upb::SymbolTable symtab; - upb::HandlerCache cache(&RegisterIncrementor, &x); - upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr())); - cache.Get(md); - ASSERT(x == 1); - } - - ASSERT(x == 0); -} +// Must be last. +#include "upb/port_def.inc" void TestIteration() { upb::SymbolTable symtab; @@ -991,38 +115,6 @@ void TestDefault() { extern "C" { int run_tests() { - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - TestHandler(); - - TestMismatchedTypes(); - - TestHandlerDataDestruction(); TestIteration(); TestArena(); TestDefault(); diff --git a/upb/def.c b/upb/def.c index 30819bdd9e..59cf309910 100644 --- a/upb/def.c +++ b/upb/def.c @@ -40,7 +40,6 @@ struct upb_fielddef { uint32_t number_; uint16_t index_; uint16_t layout_index; - uint32_t selector_base; /* Used to index into a upb::Handlers table. */ bool is_extension_; bool lazy_; bool packed_; @@ -53,8 +52,6 @@ struct upb_msgdef { const upb_msglayout *layout; const upb_filedef *file; const char *full_name; - uint32_t selector_count; - uint32_t submsg_field_count; /* Tables for looking up fields by number and name. */ upb_inttable itof; @@ -184,30 +181,6 @@ int cmp_fields(const void *p1, const void *p2) { return field_rank(f1) - field_rank(f2); } -/* A few implementation details of handlers. We put these here to avoid - * a def -> handlers dependency. */ - -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ - -static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; -} - -static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; -} - static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } @@ -389,10 +362,6 @@ const char *upb_fielddef_jsonname(const upb_fielddef *f) { return f->json_name; } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; -} - const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { return f->file; } @@ -555,14 +524,6 @@ upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { return m->file->syntax; } -size_t upb_msgdef_selectorcount(const upb_msgdef *m) { - return m->selector_count; -} - -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { - return m->submsg_field_count; -} - const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { upb_value val; return upb_inttable_lookup32(&m->itof, i, &val) ? @@ -1052,13 +1013,21 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msg_field_iter it; upb_msg_oneof_iter oit; size_t hasbit; - size_t submsg_count = m->submsg_field_count; + size_t field_count = upb_msgdef_numfields(m); + size_t submsg_count = 0; const upb_msglayout **submsgs; upb_msglayout_field *fields; memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); - fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + /* Count sub-messages. */ + for (size_t i = 0; i < field_count; i++) { + if (upb_fielddef_issubmsg(&m->fields[i])) { + submsg_count++; + } + } + + fields = symtab_alloc(ctx, field_count * sizeof(*fields)); submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); @@ -1209,49 +1178,6 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { assign_layout_indices(m, fields); } -static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); -} - static char *strviewdup(symtab_addctx *ctx, upb_strview view) { return upb_strdup2(view.data, view.size, ctx->alloc); } @@ -1614,7 +1540,8 @@ static void create_fielddef( upb_value v, field_v, json_v; size_t json_size; - f = (upb_fielddef*)&m->fields[m->field_count++]; + f = (upb_fielddef*)&m->fields[m->field_count]; + f->index_ = m->field_count++; f->msgdef = m; f->is_extension_ = false; @@ -1849,7 +1776,6 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, create_fielddef(ctx, m->full_name, m, fields[i]); } - assign_msg_indices(ctx, m); finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); upb_inttable_compact2(&m->itof, ctx->alloc); diff --git a/upb/def.h b/upb/def.h index c9eb9ceb0d..50f2ad50ca 100644 --- a/upb/def.h +++ b/upb/def.h @@ -109,9 +109,6 @@ const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f); const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f); const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f); -/* Internal only. */ -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f); - /* upb_oneofdef ***************************************************************/ typedef upb_inttable_iter upb_oneof_iter; @@ -196,10 +193,6 @@ UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, return upb_msgdef_ntof(m, name, strlen(name)); } -/* Internal-only. */ -size_t upb_msgdef_selectorcount(const upb_msgdef *m); -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m); - /* Lookup of either field or oneof by name. Returns whether either was found. * If the return is true, then the found def will be set, and the non-found * one set to NULL. */ diff --git a/upb/handlers-inl.h b/upb/handlers-inl.h deleted file mode 100644 index 8f8634bfaa..0000000000 --- a/upb/handlers-inl.h +++ /dev/null @@ -1,923 +0,0 @@ -/* -** Inline definitions for handlers.h, which are particularly long and a bit -** tricky. -*/ - -#ifndef UPB_HANDLERS_INL_H_ -#define UPB_HANDLERS_INL_H_ - -#include -#include -#include "upb/handlers.h" - -#include "upb/port_def.inc" - -#ifdef __cplusplus - -/* Type detection and typedefs for integer types. - * For platforms where there are multiple 32-bit or 64-bit types, we need to be - * able to enumerate them so we can properly create overloads for all variants. - * - * If any platform existed where there were three integer types with the same - * size, this would have to become more complicated. For example, short, int, - * and long could all be 32-bits. Even more diabolically, short, int, long, - * and long long could all be 64 bits and still be standard-compliant. - * However, few platforms are this strange, and it's unlikely that upb will be - * used on the strangest ones. */ - -/* Can't count on stdint.h limits like INT32_MAX, because in C++ these are - * only defined when __STDC_LIMIT_MACROS are defined before the *first* include - * of stdint.h. We can't guarantee that someone else didn't include these first - * without defining __STDC_LIMIT_MACROS. */ -#define UPB_INT32_MAX 0x7fffffffLL -#define UPB_INT32_MIN (-UPB_INT32_MAX - 1) -#define UPB_INT64_MAX 0x7fffffffffffffffLL -#define UPB_INT64_MIN (-UPB_INT64_MAX - 1) - -#if INT_MAX == UPB_INT32_MAX && INT_MIN == UPB_INT32_MIN -#define UPB_INT_IS_32BITS 1 -#endif - -#if LONG_MAX == UPB_INT32_MAX && LONG_MIN == UPB_INT32_MIN -#define UPB_LONG_IS_32BITS 1 -#endif - -#if LONG_MAX == UPB_INT64_MAX && LONG_MIN == UPB_INT64_MIN -#define UPB_LONG_IS_64BITS 1 -#endif - -#if LLONG_MAX == UPB_INT64_MAX && LLONG_MIN == UPB_INT64_MIN -#define UPB_LLONG_IS_64BITS 1 -#endif - -/* We use macros instead of typedefs so we can undefine them later and avoid - * leaking them outside this header file. */ -#if UPB_INT_IS_32BITS -#define UPB_INT32_T int -#define UPB_UINT32_T unsigned int - -#if UPB_LONG_IS_32BITS -#define UPB_TWO_32BIT_TYPES 1 -#define UPB_INT32ALT_T long -#define UPB_UINT32ALT_T unsigned long -#endif /* UPB_LONG_IS_32BITS */ - -#elif UPB_LONG_IS_32BITS /* && !UPB_INT_IS_32BITS */ -#define UPB_INT32_T long -#define UPB_UINT32_T unsigned long -#endif /* UPB_INT_IS_32BITS */ - - -#if UPB_LONG_IS_64BITS -#define UPB_INT64_T long -#define UPB_UINT64_T unsigned long - -#if UPB_LLONG_IS_64BITS -#define UPB_TWO_64BIT_TYPES 1 -#define UPB_INT64ALT_T long long -#define UPB_UINT64ALT_T unsigned long long -#endif /* UPB_LLONG_IS_64BITS */ - -#elif UPB_LLONG_IS_64BITS /* && !UPB_LONG_IS_64BITS */ -#define UPB_INT64_T long long -#define UPB_UINT64_T unsigned long long -#endif /* UPB_LONG_IS_64BITS */ - -#undef UPB_INT32_MAX -#undef UPB_INT32_MIN -#undef UPB_INT64_MAX -#undef UPB_INT64_MIN -#undef UPB_INT_IS_32BITS -#undef UPB_LONG_IS_32BITS -#undef UPB_LONG_IS_64BITS -#undef UPB_LLONG_IS_64BITS - - -namespace upb { - -typedef void CleanupFunc(void *ptr); - -/* Template to remove "const" from "const T*" and just return "T*". - * - * We define a nonsense default because otherwise it will fail to instantiate as - * a function parameter type even in cases where we don't expect any caller to - * actually match the overload. */ -class CouldntRemoveConst {}; -template struct remove_constptr { typedef CouldntRemoveConst type; }; -template struct remove_constptr { typedef T *type; }; - -/* Template that we use below to remove a template specialization from - * consideration if it matches a specific type. */ -template struct disable_if_same { typedef void Type; }; -template struct disable_if_same {}; - -template void DeletePointer(void *p) { delete static_cast(p); } - -template -struct FirstUnlessVoidOrBool { - typedef T1 value; -}; - -template -struct FirstUnlessVoidOrBool { - typedef T2 value; -}; - -template -struct FirstUnlessVoidOrBool { - typedef T2 value; -}; - -template -struct is_same { - static bool value; -}; - -template -struct is_same { - static bool value; -}; - -template -bool is_same::value = false; - -template -bool is_same::value = true; - -/* FuncInfo *******************************************************************/ - -/* Info about the user's original, pre-wrapped function. */ -template -struct FuncInfo { - /* The type of the closure that the function takes (its first param). */ - typedef C Closure; - - /* The return type. */ - typedef R Return; -}; - -/* Func ***********************************************************************/ - -/* Func1, Func2, Func3: Template classes representing a function and its - * signature. - * - * Since the function is a template parameter, calling the function can be - * inlined at compile-time and does not require a function pointer at runtime. - * These functions are not bound to a handler data so have no data or cleanup - * handler. */ -struct UnboundFunc { - CleanupFunc *GetCleanup() { return nullptr; } - void *GetData() { return nullptr; } -}; - -template -struct Func1 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1) { return F(p1); } -}; - -template -struct Func2 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2) { return F(p1, p2); } -}; - -template -struct Func3 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2, P3 p3) { return F(p1, p2, p3); } -}; - -template -struct Func4 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2, P3 p3, P4 p4) { return F(p1, p2, p3, p4); } -}; - -template -struct Func5 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) { - return F(p1, p2, p3, p4, p5); - } -}; - -/* BoundFunc ******************************************************************/ - -/* BoundFunc2, BoundFunc3: Like Func2/Func3 except also contains a value that - * shall be bound to the function's second parameter. - * - * Note that the second parameter is a const pointer, but our stored bound value - * is non-const so we can free it when the handlers are destroyed. */ -template -struct BoundFunc { - typedef typename remove_constptr::type MutableP2; - explicit BoundFunc(MutableP2 data_) : data(data_) {} - CleanupFunc *GetCleanup() { return &DeletePointer; } - MutableP2 GetData() { return data; } - MutableP2 data; -}; - -template -struct BoundFunc2 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc2(typename Base::MutableP2 arg) : Base(arg) {} -}; - -template -struct BoundFunc3 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc3(typename Base::MutableP2 arg) : Base(arg) {} -}; - -template -struct BoundFunc4 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc4(typename Base::MutableP2 arg) : Base(arg) {} -}; - -template -struct BoundFunc5 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc5(typename Base::MutableP2 arg) : Base(arg) {} -}; - -/* FuncSig ********************************************************************/ - -/* FuncSig1, FuncSig2, FuncSig3: template classes reflecting a function - * *signature*, but without a specific function attached. - * - * These classes contain member functions that can be invoked with a - * specific function to return a Func/BoundFunc class. */ -template -struct FuncSig1 { - template - Func1 > GetFunc() { - return Func1 >(); - } -}; - -template -struct FuncSig2 { - template - Func2 > GetFunc() { - return Func2 >(); - } - - template - BoundFunc2 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc2 >(param2); - } -}; - -template -struct FuncSig3 { - template - Func3 > GetFunc() { - return Func3 >(); - } - - template - BoundFunc3 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc3 >(param2); - } -}; - -template -struct FuncSig4 { - template - Func4 > GetFunc() { - return Func4 >(); - } - - template - BoundFunc4 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc4 >(param2); - } -}; - -template -struct FuncSig5 { - template - Func5 > GetFunc() { - return Func5 >(); - } - - template - BoundFunc5 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc5 >(param2); - } -}; - -/* Overloaded template function that can construct the appropriate FuncSig* - * class given a function pointer by deducing the template parameters. */ -template -inline FuncSig1 MatchFunc(R (*f)(P1)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig1(); -} - -template -inline FuncSig2 MatchFunc(R (*f)(P1, P2)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig2(); -} - -template -inline FuncSig3 MatchFunc(R (*f)(P1, P2, P3)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig3(); -} - -template -inline FuncSig4 MatchFunc(R (*f)(P1, P2, P3, P4)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig4(); -} - -template -inline FuncSig5 MatchFunc(R (*f)(P1, P2, P3, P4, P5)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig5(); -} - -/* MethodSig ******************************************************************/ - -/* CallMethod*: a function template that calls a given method. */ -template -R CallMethod0(C *obj) { - return ((*obj).*F)(); -} - -template -R CallMethod1(C *obj, P1 arg1) { - return ((*obj).*F)(arg1); -} - -template -R CallMethod2(C *obj, P1 arg1, P2 arg2) { - return ((*obj).*F)(arg1, arg2); -} - -template -R CallMethod3(C *obj, P1 arg1, P2 arg2, P3 arg3) { - return ((*obj).*F)(arg1, arg2, arg3); -} - -template -R CallMethod4(C *obj, P1 arg1, P2 arg2, P3 arg3, P4 arg4) { - return ((*obj).*F)(arg1, arg2, arg3, arg4); -} - -/* MethodSig: like FuncSig, but for member functions. - * - * GetFunc() returns a normal FuncN object, so after calling GetFunc() no - * more logic is required to special-case methods. */ -template -struct MethodSig0 { - template - Func1, FuncInfo > GetFunc() { - return Func1, FuncInfo >(); - } -}; - -template -struct MethodSig1 { - template - Func2, FuncInfo > GetFunc() { - return Func2, FuncInfo >(); - } - - template - BoundFunc2, FuncInfo > GetFunc( - typename remove_constptr::type param1) { - return BoundFunc2, FuncInfo >( - param1); - } -}; - -template -struct MethodSig2 { - template - Func3, FuncInfo > - GetFunc() { - return Func3, - FuncInfo >(); - } - - template - BoundFunc3, FuncInfo > - GetFunc(typename remove_constptr::type param1) { - return BoundFunc3, - FuncInfo >(param1); - } -}; - -template -struct MethodSig3 { - template - Func4, FuncInfo > - GetFunc() { - return Func4, - FuncInfo >(); - } - - template - BoundFunc4, - FuncInfo > - GetFunc(typename remove_constptr::type param1) { - return BoundFunc4, - FuncInfo >(param1); - } -}; - -template -struct MethodSig4 { - template - Func5, - FuncInfo > - GetFunc() { - return Func5, - FuncInfo >(); - } - - template - BoundFunc5, - FuncInfo > - GetFunc(typename remove_constptr::type param1) { - return BoundFunc5, FuncInfo >( - param1); - } -}; - -template -inline MethodSig0 MatchFunc(R (C::*f)()) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig0(); -} - -template -inline MethodSig1 MatchFunc(R (C::*f)(P1)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig1(); -} - -template -inline MethodSig2 MatchFunc(R (C::*f)(P1, P2)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig2(); -} - -template -inline MethodSig3 MatchFunc(R (C::*f)(P1, P2, P3)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig3(); -} - -template -inline MethodSig4 MatchFunc(R (C::*f)(P1, P2, P3, P4)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig4(); -} - -/* MaybeWrapReturn ************************************************************/ - -/* Template class that attempts to wrap the return value of the function so it - * matches the expected type. There are two main adjustments it may make: - * - * 1. If the function returns void, make it return the expected type and with - * a value that always indicates success. - * 2. If the function returns bool, make it return the expected type with a - * value that indicates success or failure. - * - * The "expected type" for return is: - * 1. void* for start handlers. If the closure parameter has a different type - * we will cast it to void* for the return in the success case. - * 2. size_t for string buffer handlers. - * 3. bool for everything else. */ - -/* Template parameters are FuncN type and desired return type. */ -template -struct MaybeWrapReturn; - -/* If the return type matches, return the given function unwrapped. */ -template -struct MaybeWrapReturn { - typedef F Func; -}; - -/* Function wrapper that munges the return value from void to (bool)true. */ -template -bool ReturnTrue2(P1 p1, P2 p2) { - F(p1, p2); - return true; -} - -template -bool ReturnTrue3(P1 p1, P2 p2, P3 p3) { - F(p1, p2, p3); - return true; -} - -/* Function wrapper that munges the return value from void to (void*)arg1 */ -template -void *ReturnClosure2(P1 p1, P2 p2) { - F(p1, p2); - return p1; -} - -template -void *ReturnClosure3(P1 p1, P2 p2, P3 p3) { - F(p1, p2, p3); - return p1; -} - -/* Function wrapper that munges the return value from R to void*. */ -template -void *CastReturnToVoidPtr2(P1 p1, P2 p2) { - return F(p1, p2); -} - -template -void *CastReturnToVoidPtr3(P1 p1, P2 p2, P3 p3) { - return F(p1, p2, p3); -} - -/* Function wrapper that munges the return value from bool to void*. */ -template -void *ReturnClosureOrBreak2(P1 p1, P2 p2) { - return F(p1, p2) ? p1 : UPB_BREAK; -} - -template -void *ReturnClosureOrBreak3(P1 p1, P2 p2, P3 p3) { - return F(p1, p2, p3) ? p1 : UPB_BREAK; -} - -/* For the string callback, which takes five params, returns the size param. */ -template -size_t ReturnStringLen(P1 p1, P2 p2, const char *p3, size_t p4, - const upb_bufhandle *p5) { - F(p1, p2, p3, p4, p5); - return p4; -} - -/* For the string callback, which takes five params, returns the size param or - * zero. */ -template -size_t ReturnNOr0(P1 p1, P2 p2, const char *p3, size_t p4, - const upb_bufhandle *p5) { - return F(p1, p2, p3, p4, p5) ? p4 : 0; -} - -/* If we have a function returning void but want a function returning bool, wrap - * it in a function that returns true. */ -template -struct MaybeWrapReturn, bool> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, bool> { - typedef Func3, I> Func; -}; - -/* If our function returns void but we want one returning void*, wrap it in a - * function that returns the first argument. */ -template -struct MaybeWrapReturn, void *> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, void *> { - typedef Func3, I> Func; -}; - -/* If our function returns R* but we want one returning void*, wrap it in a - * function that casts to void*. */ -template -struct MaybeWrapReturn, void *, - typename disable_if_same::Type> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, void *, - typename disable_if_same::Type> { - typedef Func3, I> - Func; -}; - -/* If our function returns bool but we want one returning void*, wrap it in a - * function that returns either the first param or UPB_BREAK. */ -template -struct MaybeWrapReturn, void *> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, void *> { - typedef Func3, I> - Func; -}; - -/* If our function returns void but we want one returning size_t, wrap it in a - * function that returns the size argument. */ -template -struct MaybeWrapReturn< - Func5, - size_t> { - typedef Func5, I> Func; -}; - -/* If our function returns bool but we want one returning size_t, wrap it in a - * function that returns either 0 or the buf size. */ -template -struct MaybeWrapReturn< - Func5, - size_t> { - typedef Func5, I> Func; -}; - -/* ConvertParams **************************************************************/ - -/* Template class that converts the function parameters if necessary, and - * ignores the HandlerData parameter if appropriate. - * - * Template parameter is the are FuncN function type. */ -template -struct ConvertParams; - -/* Function that discards the handler data parameter. */ -template -R IgnoreHandlerData2(void *p1, const void *hd) { - UPB_UNUSED(hd); - return F(static_cast(p1)); -} - -template -R IgnoreHandlerData3(void *p1, const void *hd, P2Wrapper p2) { - UPB_UNUSED(hd); - return F(static_cast(p1), p2); -} - -template -R IgnoreHandlerData4(void *p1, const void *hd, P2 p2, P3 p3) { - UPB_UNUSED(hd); - return F(static_cast(p1), p2, p3); -} - -template -R IgnoreHandlerData5(void *p1, const void *hd, P2 p2, P3 p3, P4 p4) { - UPB_UNUSED(hd); - return F(static_cast(p1), p2, p3, p4); -} - -template -R IgnoreHandlerDataIgnoreHandle(void *p1, const void *hd, const char *p2, - size_t p3, const upb_bufhandle *handle) { - UPB_UNUSED(hd); - UPB_UNUSED(handle); - return F(static_cast(p1), p2, p3); -} - -/* Function that casts the handler data parameter. */ -template -R CastHandlerData2(void *c, const void *hd) { - return F(static_cast(c), static_cast(hd)); -} - -template -R CastHandlerData3(void *c, const void *hd, P3Wrapper p3) { - return F(static_cast(c), static_cast(hd), p3); -} - -template -R CastHandlerData5(void *c, const void *hd, P3 p3, P4 p4, P5 p5) { - return F(static_cast(c), static_cast(hd), p3, p4, p5); -} - -template -R CastHandlerDataIgnoreHandle(void *c, const void *hd, const char *p3, - size_t p4, const upb_bufhandle *handle) { - UPB_UNUSED(handle); - return F(static_cast(c), static_cast(hd), p3, p4); -} - -/* For unbound functions, ignore the handler data. */ -template -struct ConvertParams, T> { - typedef Func2, I> Func; -}; - -template -struct ConvertParams, - R2 (*)(P1_2, P2_2, P3_2)> { - typedef Func3, I> Func; -}; - -/* For StringBuffer only; this ignores both the handler data and the - * upb_bufhandle. */ -template -struct ConvertParams, T> { - typedef Func5, - I> Func; -}; - -template -struct ConvertParams, T> { - typedef Func5, I> Func; -}; - -/* For bound functions, cast the handler data. */ -template -struct ConvertParams, T> { - typedef Func2, I> - Func; -}; - -template -struct ConvertParams, - R2 (*)(P1_2, P2_2, P3_2)> { - typedef Func3, I> Func; -}; - -/* For StringBuffer only; this ignores the upb_bufhandle. */ -template -struct ConvertParams, T> { - typedef Func5, I> - Func; -}; - -template -struct ConvertParams, T> { - typedef Func5, I> Func; -}; - -/* utype/ltype are upper/lower-case, ctype is canonical C type, vtype is - * variant C type. */ -#define TYPE_METHODS(utype, ltype, ctype, vtype) \ - template <> \ - struct CanonicalType { \ - typedef ctype Type; \ - }; \ - template <> \ - inline bool HandlersPtr::SetValueHandler( \ - FieldDefPtr f, const HandlersPtr::utype##Handler &handler) { \ - handler.AddCleanup(ptr()); \ - return upb_handlers_set##ltype(ptr(), f.ptr(), handler.handler(), \ - &handler.attr()); \ - } - -TYPE_METHODS(Double, double, double, double) -TYPE_METHODS(Float, float, float, float) -TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64_T) -TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32_T) -TYPE_METHODS(Int64, int64, int64_t, UPB_INT64_T) -TYPE_METHODS(Int32, int32, int32_t, UPB_INT32_T) -TYPE_METHODS(Bool, bool, bool, bool) - -#ifdef UPB_TWO_32BIT_TYPES -TYPE_METHODS(Int32, int32, int32_t, UPB_INT32ALT_T) -TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32ALT_T) -#endif - -#ifdef UPB_TWO_64BIT_TYPES -TYPE_METHODS(Int64, int64, int64_t, UPB_INT64ALT_T) -TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64ALT_T) -#endif -#undef TYPE_METHODS - -template <> struct CanonicalType { - typedef Status* Type; -}; - -template struct ReturnOf; - -template -struct ReturnOf { - typedef R Return; -}; - -template -struct ReturnOf { - typedef R Return; -}; - -template -struct ReturnOf { - typedef R Return; -}; - -template -struct ReturnOf { - typedef R Return; -}; - - -template -template -inline Handler::Handler(F func) - : registered_(false), - cleanup_data_(func.GetData()), - cleanup_func_(func.GetCleanup()) { - attr_.handler_data = func.GetData(); - typedef typename ReturnOf::Return Return; - typedef typename ConvertParams::Func ConvertedParamsFunc; - typedef typename MaybeWrapReturn::Func - ReturnWrappedFunc; - handler_ = ReturnWrappedFunc().Call; - - /* Set attributes based on what templates can statically tell us about the - * user's function. */ - - /* If the original function returns void, then we know that we wrapped it to - * always return ok. */ - bool always_ok = is_same::value; - attr_.alwaysok = always_ok; - - /* Closure parameter and return type. */ - attr_.closure_type = UniquePtrForType(); - - /* We use the closure type (from the first parameter) if the return type is - * void or bool, since these are the two cases we wrap to return the closure's - * type anyway. - * - * This is all nonsense for non START* handlers, but it doesn't matter because - * in that case the value will be ignored. */ - typedef typename FirstUnlessVoidOrBool::value - EffectiveReturn; - attr_.return_closure_type = UniquePtrForType(); -} - -template -inline void Handler::AddCleanup(upb_handlers* h) const { - UPB_ASSERT(!registered_); - registered_ = true; - if (cleanup_func_) { - bool ok = upb_handlers_addcleanup(h, cleanup_data_, cleanup_func_); - UPB_ASSERT(ok); - } -} - -} /* namespace upb */ - -#endif /* __cplusplus */ - - -#undef UPB_TWO_32BIT_TYPES -#undef UPB_TWO_64BIT_TYPES -#undef UPB_INT32_T -#undef UPB_UINT32_T -#undef UPB_INT32ALT_T -#undef UPB_UINT32ALT_T -#undef UPB_INT64_T -#undef UPB_UINT64_T -#undef UPB_INT64ALT_T -#undef UPB_UINT64ALT_T - -#include "upb/port_undef.inc" - -#endif /* UPB_HANDLERS_INL_H_ */ diff --git a/upb/handlers.c b/upb/handlers.c deleted file mode 100644 index 4168a4fb09..0000000000 --- a/upb/handlers.c +++ /dev/null @@ -1,545 +0,0 @@ -/* -** TODO(haberman): it's unclear whether a lot of the consistency checks should -** UPB_ASSERT() or return false. -*/ - -#include "upb/handlers.h" - -#include - -#include "upb/sink.h" - -#include "upb/port_def.inc" - -struct upb_handlers { - upb_handlercache *cache; - const upb_msgdef *msg; - const upb_handlers **sub; - const void *top_closure_type; - upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */ -}; - -static void *upb_calloc(upb_arena *arena, size_t size) { - void *mem = upb_malloc(upb_arena_alloc(arena), size); - if (mem) { - memset(mem, 0, size); - } - return mem; -} - -/* Defined for the sole purpose of having a unique pointer value for - * UPB_NO_CLOSURE. */ -char _upb_noclosure; - -/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the - * subhandlers for this submessage field. */ -#define SUBH(h, selector) (h->sub[selector]) - -/* The selector for a submessage field is the field index. */ -#define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f)) - -static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t sel; - bool ok; - - ok = upb_handlers_getselector(f, type, &sel); - - UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f)); - UPB_ASSERT(ok); - - return sel; -} - -static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - int32_t sel = trygetsel(h, f, type); - UPB_ASSERT(sel >= 0); - return sel; -} - -static const void **returntype(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type; -} - -static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, - upb_handlertype_t type, upb_func *func, - const upb_handlerattr *attr) { - upb_handlerattr set_attr = UPB_HANDLERATTR_INIT; - const void *closure_type; - const void **context_closure_type; - - UPB_ASSERT(!h->table[sel].func); - - if (attr) { - set_attr = *attr; - } - - /* Check that the given closure type matches the closure type that has been - * established for this context (if any). */ - closure_type = set_attr.closure_type; - - if (type == UPB_HANDLER_STRING) { - context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR); - } else if (f && upb_fielddef_isseq(f) && - type != UPB_HANDLER_STARTSEQ && - type != UPB_HANDLER_ENDSEQ) { - context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ); - } else { - context_closure_type = &h->top_closure_type; - } - - if (closure_type && *context_closure_type && - closure_type != *context_closure_type) { - return false; - } - - if (closure_type) - *context_closure_type = closure_type; - - /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer - * matches any pre-existing expectations about what type is expected. */ - if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) { - const void *return_type = set_attr.return_closure_type; - const void *table_return_type = h->table[sel].attr.return_closure_type; - if (return_type && table_return_type && return_type != table_return_type) { - return false; - } - - if (table_return_type && !return_type) { - set_attr.return_closure_type = table_return_type; - } - } - - h->table[sel].func = (upb_func*)func; - h->table[sel].attr = set_attr; - return true; -} - -/* Returns the effective closure type for this handler (which will propagate - * from outer frames if this frame has no START* handler). Not implemented for - * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is - * the effective closure type is unspecified (either no handler was registered - * to specify it or the handler that was registered did not specify the closure - * type). */ -const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - const void *ret; - upb_selector_t sel; - - UPB_ASSERT(type != UPB_HANDLER_STRING); - ret = h->top_closure_type; - - if (upb_fielddef_isseq(f) && - type != UPB_HANDLER_STARTSEQ && - type != UPB_HANDLER_ENDSEQ && - h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) { - ret = h->table[sel].attr.return_closure_type; - } - - if (type == UPB_HANDLER_STRING && - h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) { - ret = h->table[sel].attr.return_closure_type; - } - - /* The effective type of the submessage; not used yet. - * if (type == SUBMESSAGE && - * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) { - * ret = h->table[sel].attr.return_closure_type; - * } */ - - return ret; -} - -static upb_handlers *upb_handlers_new(const upb_msgdef *md, - upb_handlercache *cache, - upb_arena *arena) { - int extra; - upb_handlers *h; - - extra = - (int)(sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1)); - h = upb_calloc(arena, sizeof(*h) + extra); - if (!h) return NULL; - - h->cache = cache; - h->msg = md; - - if (upb_msgdef_submsgfieldcount(md) > 0) { - size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub); - h->sub = upb_calloc(arena, bytes); - if (!h->sub) return NULL; - } else { - h->sub = 0; - } - - /* calloc() above initialized all handlers to NULL. */ - return h; -} - -/* Public interface ***********************************************************/ - -#define SETTER(name, handlerctype, handlertype) \ - bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \ - handlerctype func, \ - const upb_handlerattr *attr) { \ - int32_t sel = trygetsel(h, f, handlertype); \ - return doset(h, sel, f, handlertype, (upb_func *)func, attr); \ - } - -SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32) -SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64) -SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32) -SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64) -SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT) -SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE) -SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL) -SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR) -SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING) -SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR) -SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ) -SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG) -SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG) -SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ) - -#undef SETTER - -bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func, - const upb_handlerattr *attr) { - return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} - -bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, - const upb_handlerattr *attr) { - return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} - -bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, - const upb_handlerattr *attr) { - return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} - -bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, - const upb_handlers *sub) { - UPB_ASSERT(sub); - UPB_ASSERT(upb_fielddef_issubmsg(f)); - if (SUBH_F(h, f)) return false; /* Can't reset. */ - if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) { - return false; - } - SUBH_F(h, f) = sub; - return true; -} - -const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, - const upb_fielddef *f) { - UPB_ASSERT(upb_fielddef_issubmsg(f)); - return SUBH_F(h, f); -} - -upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s, - const void **handler_data) { - upb_func *ret = (upb_func *)h->table[s].func; - if (ret && handler_data) { - *handler_data = h->table[s].attr.handler_data; - } - return ret; -} - -bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel, - upb_handlerattr *attr) { - if (!upb_handlers_gethandler(h, sel, NULL)) - return false; - *attr = h->table[sel].attr; - return true; -} - -const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, - upb_selector_t sel) { - /* STARTSUBMSG selector in sel is the field's selector base. */ - return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT); -} - -const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } - -bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) { - return upb_handlercache_addcleanup(h->cache, p, func); -} - -upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; - case UPB_TYPE_INT64: return UPB_HANDLER_INT64; - case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32; - case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64; - case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; - case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE; - case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL; - default: UPB_ASSERT(false); return -1; /* Invalid input. */ - } -} - -bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, - upb_selector_t *s) { - uint32_t selector_base = upb_fielddef_selectorbase(f); - switch (type) { - case UPB_HANDLER_INT32: - case UPB_HANDLER_INT64: - case UPB_HANDLER_UINT32: - case UPB_HANDLER_UINT64: - case UPB_HANDLER_FLOAT: - case UPB_HANDLER_DOUBLE: - case UPB_HANDLER_BOOL: - if (!upb_fielddef_isprimitive(f) || - upb_handlers_getprimitivehandlertype(f) != type) - return false; - *s = selector_base; - break; - case UPB_HANDLER_STRING: - if (upb_fielddef_isstring(f)) { - *s = selector_base; - } else if (upb_fielddef_lazy(f)) { - *s = selector_base + 3; - } else { - return false; - } - break; - case UPB_HANDLER_STARTSTR: - if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { - *s = selector_base + 1; - } else { - return false; - } - break; - case UPB_HANDLER_ENDSTR: - if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { - *s = selector_base + 2; - } else { - return false; - } - break; - case UPB_HANDLER_STARTSEQ: - if (!upb_fielddef_isseq(f)) return false; - *s = selector_base - 2; - break; - case UPB_HANDLER_ENDSEQ: - if (!upb_fielddef_isseq(f)) return false; - *s = selector_base - 1; - break; - case UPB_HANDLER_STARTSUBMSG: - if (!upb_fielddef_issubmsg(f)) return false; - /* Selectors for STARTSUBMSG are at the beginning of the table so that the - * selector can also be used as an index into the "sub" array of - * subhandlers. The indexes for the two into these two tables are the - * same, except that in the handler table the static selectors come first. */ - *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT; - break; - case UPB_HANDLER_ENDSUBMSG: - if (!upb_fielddef_issubmsg(f)) return false; - *s = selector_base; - break; - } - UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f))); - return true; -} - -/* upb_handlercache ***********************************************************/ - -struct upb_handlercache { - upb_arena *arena; - upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */ - upb_handlers_callback *callback; - const void *closure; -}; - -const upb_handlers *upb_handlercache_get(upb_handlercache *c, - const upb_msgdef *md) { - int i, n; - upb_value v; - upb_handlers *h; - - if (upb_inttable_lookupptr(&c->tab, md, &v)) { - return upb_value_getptr(v); - } - - h = upb_handlers_new(md, c, c->arena); - v = upb_value_ptr(h); - - if (!h) return NULL; - if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL; - - c->callback(c->closure, h); - - /* For each submessage field, get or create a handlers object and set it as - * the subhandlers. */ - n = upb_msgdef_fieldcount(md); - for (i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - - if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); - const upb_handlers *sub_mh = upb_handlercache_get(c, subdef); - - if (!sub_mh) return NULL; - - upb_handlers_setsubhandlers(h, f, sub_mh); - } - } - - return h; -} - - -upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback, - const void *closure) { - upb_handlercache *cache = upb_gmalloc(sizeof(*cache)); - - if (!cache) return NULL; - - cache->arena = upb_arena_new(); - - cache->callback = callback; - cache->closure = closure; - - if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom; - - return cache; - -oom: - upb_gfree(cache); - return NULL; -} - -void upb_handlercache_free(upb_handlercache *cache) { - upb_inttable_uninit(&cache->tab); - upb_arena_free(cache->arena); - upb_gfree(cache); -} - -bool upb_handlercache_addcleanup(upb_handlercache *c, void *p, - upb_handlerfree *func) { - return upb_arena_addcleanup(c->arena, p, func); -} - -/* upb_byteshandler ***********************************************************/ - -bool upb_byteshandler_setstartstr(upb_byteshandler *h, - upb_startstr_handlerfunc *func, void *d) { - h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func; - h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d; - return true; -} - -bool upb_byteshandler_setstring(upb_byteshandler *h, - upb_string_handlerfunc *func, void *d) { - h->table[UPB_STRING_SELECTOR].func = (upb_func*)func; - h->table[UPB_STRING_SELECTOR].attr.handler_data = d; - return true; -} - -bool upb_byteshandler_setendstr(upb_byteshandler *h, - upb_endfield_handlerfunc *func, void *d) { - h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func; - h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d; - return true; -} - -/** Handlers for upb_msg ******************************************************/ - -typedef struct { - size_t offset; - int32_t hasbit; -} upb_msg_handlerdata; - -/* Fallback implementation if the handler is not specialized by the producer. */ -#define MSG_WRITER(type, ctype) \ - bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \ - uint8_t *m = c; \ - const upb_msg_handlerdata *d = hd; \ - if (d->hasbit > 0) \ - *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ - *(ctype*)&m[d->offset] = val; \ - return true; \ - } \ - -MSG_WRITER(double, double) -MSG_WRITER(float, float) -MSG_WRITER(int32, int32_t) -MSG_WRITER(int64, int64_t) -MSG_WRITER(uint32, uint32_t) -MSG_WRITER(uint64, uint64_t) -MSG_WRITER(bool, bool) - -bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f, - size_t offset, int32_t hasbit) { - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - bool ok; - - upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d)); - if (!d) return false; - d->offset = offset; - d->hasbit = hasbit; - - attr.handler_data = d; - attr.alwaysok = true; - upb_handlers_addcleanup(h, d, upb_gfree); - -#define TYPE(u, l) \ - case UPB_TYPE_##u: \ - ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break; - - ok = false; - - switch (upb_fielddef_type(f)) { - TYPE(INT64, int64); - TYPE(INT32, int32); - TYPE(ENUM, int32); - TYPE(UINT64, uint64); - TYPE(UINT32, uint32); - TYPE(DOUBLE, double); - TYPE(FLOAT, float); - TYPE(BOOL, bool); - default: UPB_ASSERT(false); break; - } -#undef TYPE - - return ok; -} - -bool upb_msg_getscalarhandlerdata(const upb_handlers *h, - upb_selector_t s, - upb_fieldtype_t *type, - size_t *offset, - int32_t *hasbit) { - const upb_msg_handlerdata *d; - const void *p; - upb_func *f = upb_handlers_gethandler(h, s, &p); - - if ((upb_int64_handlerfunc*)f == upb_msg_setint64) { - *type = UPB_TYPE_INT64; - } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) { - *type = UPB_TYPE_INT32; - } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) { - *type = UPB_TYPE_UINT64; - } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) { - *type = UPB_TYPE_UINT32; - } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) { - *type = UPB_TYPE_DOUBLE; - } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) { - *type = UPB_TYPE_FLOAT; - } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) { - *type = UPB_TYPE_BOOL; - } else { - return false; - } - - d = p; - *offset = d->offset; - *hasbit = d->hasbit; - return true; -} diff --git a/upb/handlers.h b/upb/handlers.h deleted file mode 100644 index 3ec212225a..0000000000 --- a/upb/handlers.h +++ /dev/null @@ -1,735 +0,0 @@ -/* -** upb::Handlers (upb_handlers) -** -** A upb_handlers is like a virtual table for a upb_msgdef. Each field of the -** message can have associated functions that will be called when we are -** parsing or visiting a stream of data. This is similar to how handlers work -** in SAX (the Simple API for XML). -** -** The handlers have no idea where the data is coming from, so a single set of -** handlers could be used with two completely different data sources (for -** example, a parser and a visitor over in-memory objects). This decoupling is -** the most important feature of upb, because it allows parsers and serializers -** to be highly reusable. -** -** This is a mixed C/C++ interface that offers a full API to both languages. -** See the top-level README for more information. -*/ - -#ifndef UPB_HANDLERS_H -#define UPB_HANDLERS_H - -#include "upb/def.h" -#include "upb/table.int.h" - -#ifdef __cplusplus -#include "upb/def.hpp" -namespace upb { -class HandlersPtr; -class HandlerCache; -template class Handler; -template struct CanonicalType; -} /* namespace upb */ -#endif - -/* Must be last. */ -#include "upb/port_def.inc" - -/* The maximum depth that the handler graph can have. This is a resource limit - * for the C stack since we sometimes need to recursively traverse the graph. - * Cycles are ok; the traversal will stop when it detects a cycle, but we must - * hit the cycle before the maximum depth is reached. - * - * If having a single static limit is too inflexible, we can add another variant - * of Handlers::Freeze that allows specifying this as a parameter. */ -#define UPB_MAX_HANDLER_DEPTH 64 - -/* All the different types of handlers that can be registered. - * Only needed for the advanced functions in upb::Handlers. */ -typedef enum { - UPB_HANDLER_INT32, - UPB_HANDLER_INT64, - UPB_HANDLER_UINT32, - UPB_HANDLER_UINT64, - UPB_HANDLER_FLOAT, - UPB_HANDLER_DOUBLE, - UPB_HANDLER_BOOL, - UPB_HANDLER_STARTSTR, - UPB_HANDLER_STRING, - UPB_HANDLER_ENDSTR, - UPB_HANDLER_STARTSUBMSG, - UPB_HANDLER_ENDSUBMSG, - UPB_HANDLER_STARTSEQ, - UPB_HANDLER_ENDSEQ -} upb_handlertype_t; - -#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1) - -#define UPB_BREAK NULL - -/* A convenient definition for when no closure is needed. */ -extern char _upb_noclosure; -#define UPB_NO_CLOSURE &_upb_noclosure - -/* A selector refers to a specific field handler in the Handlers object - * (for example: the STARTSUBMSG handler for field "field15"). */ -typedef int32_t upb_selector_t; - -/* Static selectors for upb::Handlers. */ -#define UPB_STARTMSG_SELECTOR 0 -#define UPB_ENDMSG_SELECTOR 1 -#define UPB_UNKNOWN_SELECTOR 2 -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/def.c. */ - -/* Static selectors for upb::BytesHandler. */ -#define UPB_STARTSTR_SELECTOR 0 -#define UPB_STRING_SELECTOR 1 -#define UPB_ENDSTR_SELECTOR 2 - -#ifdef __cplusplus -template const void *UniquePtrForType() { - static const char ch = 0; - return &ch; -} -#endif - -/* upb_handlers ************************************************************/ - -/* Handler attributes, to be registered with the handler itself. */ -typedef struct { - const void *handler_data; - const void *closure_type; - const void *return_closure_type; - bool alwaysok; -} upb_handlerattr; - -#define UPB_HANDLERATTR_INIT {NULL, NULL, NULL, false} - -/* Bufhandle, data passed along with a buffer to indicate its provenance. */ -struct upb_bufhandle { - /* The beginning of the buffer. This may be different than the pointer - * passed to a StringBuf handler because the handler may receive data - * that is from the middle or end of a larger buffer. */ - const char *buf; - - /* The offset within the attached object where this buffer begins. Only - * meaningful if there is an attached object. */ - size_t objofs; - - /* The attached object (if any) and a pointer representing its type. */ - const void *obj; - const void *objtype; - -#ifdef __cplusplus - template - void SetAttachedObject(const T* _obj) { - obj = _obj; - objtype = UniquePtrForType(); - } - - template - const T *GetAttachedObject() const { - return objtype == UniquePtrForType() ? static_cast(obj) - : NULL; - } -#endif -}; - -typedef struct upb_bufhandle upb_bufhandle; - -#define UPB_BUFHANDLE_INIT {NULL, 0, NULL, NULL} - -/* Handler function typedefs. */ -typedef void upb_handlerfree(void *d); -typedef bool upb_unknown_handlerfunc(void *c, const void *hd, const char *buf, - size_t n); -typedef bool upb_startmsg_handlerfunc(void *c, const void*); -typedef bool upb_endmsg_handlerfunc(void *c, const void *, upb_status *status); -typedef void* upb_startfield_handlerfunc(void *c, const void *hd); -typedef bool upb_endfield_handlerfunc(void *c, const void *hd); -typedef bool upb_int32_handlerfunc(void *c, const void *hd, int32_t val); -typedef bool upb_int64_handlerfunc(void *c, const void *hd, int64_t val); -typedef bool upb_uint32_handlerfunc(void *c, const void *hd, uint32_t val); -typedef bool upb_uint64_handlerfunc(void *c, const void *hd, uint64_t val); -typedef bool upb_float_handlerfunc(void *c, const void *hd, float val); -typedef bool upb_double_handlerfunc(void *c, const void *hd, double val); -typedef bool upb_bool_handlerfunc(void *c, const void *hd, bool val); -typedef void *upb_startstr_handlerfunc(void *c, const void *hd, - size_t size_hint); -typedef size_t upb_string_handlerfunc(void *c, const void *hd, const char *buf, - size_t n, const upb_bufhandle* handle); - -struct upb_handlers; -typedef struct upb_handlers upb_handlers; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Mutating accessors. */ -const upb_status *upb_handlers_status(upb_handlers *h); -void upb_handlers_clearerr(upb_handlers *h); -const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h); -bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *hfree); -bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setint32(upb_handlers *h, const upb_fielddef *f, - upb_int32_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setint64(upb_handlers *h, const upb_fielddef *f, - upb_int64_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setuint32(upb_handlers *h, const upb_fielddef *f, - upb_uint32_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setuint64(upb_handlers *h, const upb_fielddef *f, - upb_uint64_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setfloat(upb_handlers *h, const upb_fielddef *f, - upb_float_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setdouble(upb_handlers *h, const upb_fielddef *f, - upb_double_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setbool(upb_handlers *h, const upb_fielddef *f, - upb_bool_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartstr(upb_handlers *h, const upb_fielddef *f, - upb_startstr_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstring(upb_handlers *h, const upb_fielddef *f, - upb_string_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendstr(upb_handlers *h, const upb_fielddef *f, - upb_endfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartseq(upb_handlers *h, const upb_fielddef *f, - upb_startfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartsubmsg(upb_handlers *h, const upb_fielddef *f, - upb_startfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendsubmsg(upb_handlers *h, const upb_fielddef *f, - upb_endfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendseq(upb_handlers *h, const upb_fielddef *f, - upb_endfield_handlerfunc *func, - const upb_handlerattr *attr); - -/* Read-only accessors. */ -const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, - const upb_fielddef *f); -const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, - upb_selector_t sel); -upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s, - const void **handler_data); -bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t s, - upb_handlerattr *attr); - -/* "Static" methods */ -upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f); -bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, - upb_selector_t *s); -UPB_INLINE upb_selector_t upb_handlers_getendselector(upb_selector_t start) { - return start + 1; -} - -#ifdef __cplusplus -} /* extern "C" */ - -namespace upb { -typedef upb_handlers Handlers; -} - -/* Convenience macros for creating a Handler object that is wrapped with a - * type-safe wrapper function that converts the "void*" parameters/returns - * of the underlying C API into nice C++ function. - * - * Sample usage: - * void OnValue1(MyClosure* c, const MyHandlerData* d, int32_t val) { - * // do stuff ... - * } - * - * // Handler that doesn't need any data bound to it. - * void OnValue2(MyClosure* c, int32_t val) { - * // do stuff ... - * } - * - * // Handler that returns bool so it can return failure if necessary. - * bool OnValue3(MyClosure* c, int32_t val) { - * // do stuff ... - * return ok; - * } - * - * // Member function handler. - * class MyClosure { - * public: - * void OnValue(int32_t val) { - * // do stuff ... - * } - * }; - * - * // Takes ownership of the MyHandlerData. - * handlers->SetInt32Handler(f1, UpbBind(OnValue1, new MyHandlerData(...))); - * handlers->SetInt32Handler(f2, UpbMakeHandler(OnValue2)); - * handlers->SetInt32Handler(f1, UpbMakeHandler(OnValue3)); - * handlers->SetInt32Handler(f2, UpbMakeHandler(&MyClosure::OnValue)); - */ - -/* In C++11, the "template" disambiguator can appear even outside templates, - * so all calls can safely use this pair of macros. */ - -#define UpbMakeHandler(f) upb::MatchFunc(f).template GetFunc() - -/* We have to be careful to only evaluate "d" once. */ -#define UpbBind(f, d) upb::MatchFunc(f).template GetFunc((d)) - -/* Handler: a struct that contains the (handler, data, deleter) tuple that is - * used to register all handlers. Users can Make() these directly but it's - * more convenient to use the UpbMakeHandler/UpbBind macros above. */ -template class upb::Handler { - public: - /* The underlying, handler function signature that upb uses internally. */ - typedef T FuncPtr; - - /* Intentionally implicit. */ - template Handler(F func); - ~Handler() { UPB_ASSERT(registered_); } - - void AddCleanup(upb_handlers* h) const; - FuncPtr handler() const { return handler_; } - const upb_handlerattr& attr() const { return attr_; } - - private: - Handler(const Handler&) = delete; - Handler& operator=(const Handler&) = delete; - - FuncPtr handler_; - mutable upb_handlerattr attr_; - mutable bool registered_; - void *cleanup_data_; - upb_handlerfree *cleanup_func_; -}; - -/* A upb::Handlers object represents the set of handlers associated with a - * message in the graph of messages. You can think of it as a big virtual - * table with functions corresponding to all the events that can fire while - * parsing or visiting a message of a specific type. - * - * Any handlers that are not set behave as if they had successfully consumed - * the value. Any unset Start* handlers will propagate their closure to the - * inner frame. - * - * The easiest way to create the *Handler objects needed by the Set* methods is - * with the UpbBind() and UpbMakeHandler() macros; see below. */ -class upb::HandlersPtr { - public: - HandlersPtr(upb_handlers* ptr) : ptr_(ptr) {} - - upb_handlers* ptr() const { return ptr_; } - - typedef upb_selector_t Selector; - typedef upb_handlertype_t Type; - - typedef Handler StartFieldHandler; - typedef Handler EndFieldHandler; - typedef Handler StartMessageHandler; - typedef Handler - EndMessageHandler; - typedef Handler StartStringHandler; - typedef Handler - StringHandler; - - template struct ValueHandler { - typedef Handler H; - }; - - typedef ValueHandler::H Int32Handler; - typedef ValueHandler::H Int64Handler; - typedef ValueHandler::H UInt32Handler; - typedef ValueHandler::H UInt64Handler; - typedef ValueHandler::H FloatHandler; - typedef ValueHandler::H DoubleHandler; - typedef ValueHandler::H BoolHandler; - - /* Any function pointer can be converted to this and converted back to its - * correct type. */ - typedef void GenericFunction(); - - typedef void HandlersCallback(const void *closure, upb_handlers *h); - - /* Returns the msgdef associated with this handlers object. */ - MessageDefPtr message_def() const { - return MessageDefPtr(upb_handlers_msgdef(ptr())); - } - - /* Adds the given pointer and function to the list of cleanup functions that - * will be run when these handlers are freed. If this pointer has previously - * been registered, the function returns false and does nothing. */ - bool AddCleanup(void *ptr, upb_handlerfree *cleanup) { - return upb_handlers_addcleanup(ptr_, ptr, cleanup); - } - - /* Sets the startmsg handler for the message, which is defined as follows: - * - * bool startmsg(MyType* closure) { - * // Called when the message begins. Returns true if processing should - * // continue. - * return true; - * } - */ - bool SetStartMessageHandler(const StartMessageHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartmsg(ptr(), h.handler(), &h.attr()); - } - - /* Sets the endmsg handler for the message, which is defined as follows: - * - * bool endmsg(MyType* closure, upb_status *status) { - * // Called when processing of this message ends, whether in success or - * // failure. "status" indicates the final status of processing, and - * // can also be modified in-place to update the final status. - * } - */ - bool SetEndMessageHandler(const EndMessageHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setendmsg(ptr(), h.handler(), &h.attr()); - } - - /* Sets the value handler for the given field, which is defined as follows - * (this is for an int32 field; other field types will pass their native - * C/C++ type for "val"): - * - * bool OnValue(MyClosure* c, const MyHandlerData* d, int32_t val) { - * // Called when the field's value is encountered. "d" contains - * // whatever data was bound to this field when it was registered. - * // Returns true if processing should continue. - * return true; - * } - * - * handers->SetInt32Handler(f, UpbBind(OnValue, new MyHandlerData(...))); - * - * The value type must exactly match f->type(). - * For example, a handler that takes an int32_t parameter may only be used for - * fields of type UPB_TYPE_INT32 and UPB_TYPE_ENUM. - * - * Returns false if the handler failed to register; in this case the cleanup - * handler (if any) will be called immediately. - */ - bool SetInt32Handler(FieldDefPtr f, const Int32Handler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setint32(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetInt64Handler (FieldDefPtr f, const Int64Handler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setint64(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetUInt32Handler(FieldDefPtr f, const UInt32Handler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setuint32(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetUInt64Handler(FieldDefPtr f, const UInt64Handler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setuint64(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetFloatHandler (FieldDefPtr f, const FloatHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setfloat(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetDoubleHandler(FieldDefPtr f, const DoubleHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setdouble(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetBoolHandler(FieldDefPtr f, const BoolHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setbool(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Like the previous, but templated on the type on the value (ie. int32). - * This is mostly useful to call from other templates. To call this you must - * specify the template parameter explicitly, ie: - * h->SetValueHandler(f, UpbBind(MyHandler, MyData)); */ - template - bool SetValueHandler( - FieldDefPtr f, - const typename ValueHandler::Type>::H &handler); - - /* Sets handlers for a string field, which are defined as follows: - * - * MySubClosure* startstr(MyClosure* c, const MyHandlerData* d, - * size_t size_hint) { - * // Called when a string value begins. The return value indicates the - * // closure for the string. "size_hint" indicates the size of the - * // string if it is known, however if the string is length-delimited - * // and the end-of-string is not available size_hint will be zero. - * // This case is indistinguishable from the case where the size is - * // known to be zero. - * // - * // TODO(haberman): is it important to distinguish these cases? - * // If we had ssize_t as a type we could make -1 "unknown", but - * // ssize_t is POSIX (not ANSI) and therefore less portable. - * // In practice I suspect it won't be important to distinguish. - * return closure; - * } - * - * size_t str(MyClosure* closure, const MyHandlerData* d, - * const char *str, size_t len) { - * // Called for each buffer of string data; the multiple physical buffers - * // are all part of the same logical string. The return value indicates - * // how many bytes were consumed. If this number is less than "len", - * // this will also indicate that processing should be halted for now, - * // like returning false or UPB_BREAK from any other callback. If - * // number is greater than "len", the excess bytes will be skipped over - * // and not passed to the callback. - * return len; - * } - * - * bool endstr(MyClosure* c, const MyHandlerData* d) { - * // Called when a string value ends. Return value indicates whether - * // processing should continue. - * return true; - * } - */ - bool SetStartStringHandler(FieldDefPtr f, const StartStringHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartstr(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetStringHandler(FieldDefPtr f, const StringHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setstring(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetEndStringHandler(FieldDefPtr f, const EndFieldHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setendstr(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Sets the startseq handler, which is defined as follows: - * - * MySubClosure *startseq(MyClosure* c, const MyHandlerData* d) { - * // Called when a sequence (repeated field) begins. The returned - * // pointer indicates the closure for the sequence (or UPB_BREAK - * // to interrupt processing). - * return closure; - * } - * - * h->SetStartSequenceHandler(f, UpbBind(startseq, new MyHandlerData(...))); - * - * Returns "false" if "f" does not belong to this message or is not a - * repeated field. - */ - bool SetStartSequenceHandler(FieldDefPtr f, const StartFieldHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartseq(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Sets the startsubmsg handler for the given field, which is defined as - * follows: - * - * MySubClosure* startsubmsg(MyClosure* c, const MyHandlerData* d) { - * // Called when a submessage begins. The returned pointer indicates the - * // closure for the sequence (or UPB_BREAK to interrupt processing). - * return closure; - * } - * - * h->SetStartSubMessageHandler(f, UpbBind(startsubmsg, - * new MyHandlerData(...))); - * - * Returns "false" if "f" does not belong to this message or is not a - * submessage/group field. - */ - bool SetStartSubMessageHandler(FieldDefPtr f, const StartFieldHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartsubmsg(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Sets the endsubmsg handler for the given field, which is defined as - * follows: - * - * bool endsubmsg(MyClosure* c, const MyHandlerData* d) { - * // Called when a submessage ends. Returns true to continue processing. - * return true; - * } - * - * Returns "false" if "f" does not belong to this message or is not a - * submessage/group field. - */ - bool SetEndSubMessageHandler(FieldDefPtr f, const EndFieldHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setendsubmsg(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Starts the endsubseq handler for the given field, which is defined as - * follows: - * - * bool endseq(MyClosure* c, const MyHandlerData* d) { - * // Called when a sequence ends. Returns true continue processing. - * return true; - * } - * - * Returns "false" if "f" does not belong to this message or is not a - * repeated field. - */ - bool SetEndSequenceHandler(FieldDefPtr f, const EndFieldHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setendseq(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - private: - upb_handlers* ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_handlercache ***********************************************************/ - -/* A upb_handlercache lazily builds and caches upb_handlers. You pass it a - * function (with optional closure) that can build handlers for a given - * message on-demand, and the cache maintains a map of msgdef->handlers. */ - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_handlercache; -typedef struct upb_handlercache upb_handlercache; - -typedef void upb_handlers_callback(const void *closure, upb_handlers *h); - -upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback, - const void *closure); -void upb_handlercache_free(upb_handlercache *cache); -const upb_handlers *upb_handlercache_get(upb_handlercache *cache, - const upb_msgdef *md); -bool upb_handlercache_addcleanup(upb_handlercache *h, void *p, - upb_handlerfree *hfree); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::HandlerCache { - public: - HandlerCache(upb_handlers_callback *callback, const void *closure) - : ptr_(upb_handlercache_new(callback, closure), upb_handlercache_free) {} - HandlerCache(HandlerCache&&) = default; - HandlerCache& operator=(HandlerCache&&) = default; - HandlerCache(upb_handlercache* c) : ptr_(c, upb_handlercache_free) {} - - upb_handlercache* ptr() { return ptr_.get(); } - - const upb_handlers *Get(MessageDefPtr md) { - return upb_handlercache_get(ptr_.get(), md.ptr()); - } - - private: - std::unique_ptr ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_byteshandler ***********************************************************/ - -typedef struct { - upb_func *func; - - /* It is wasteful to include the entire attributes here: - * - * * Some of the information is redundant (like storing the closure type - * separately for each handler that must match). - * * Some of the info is only needed prior to freeze() (like closure types). - * * alignment padding wastes a lot of space for alwaysok_. - * - * If/when the size and locality of handlers is an issue, we can optimize this - * not to store the entire attr like this. We do not expose the table's - * layout to allow this optimization in the future. */ - upb_handlerattr attr; -} upb_handlers_tabent; - -#define UPB_TABENT_INIT {NULL, UPB_HANDLERATTR_INIT} - -typedef struct { - upb_handlers_tabent table[3]; -} upb_byteshandler; - -#define UPB_BYTESHANDLER_INIT \ - { \ - { UPB_TABENT_INIT, UPB_TABENT_INIT, UPB_TABENT_INIT } \ - } - -UPB_INLINE void upb_byteshandler_init(upb_byteshandler *handler) { - upb_byteshandler init = UPB_BYTESHANDLER_INIT; - *handler = init; -} - -#ifdef __cplusplus -extern "C" { -#endif - -/* Caller must ensure that "d" outlives the handlers. */ -bool upb_byteshandler_setstartstr(upb_byteshandler *h, - upb_startstr_handlerfunc *func, void *d); -bool upb_byteshandler_setstring(upb_byteshandler *h, - upb_string_handlerfunc *func, void *d); -bool upb_byteshandler_setendstr(upb_byteshandler *h, - upb_endfield_handlerfunc *func, void *d); - -#ifdef __cplusplus -} /* extern "C" */ - -namespace upb { -typedef upb_byteshandler BytesHandler; -} -#endif - -/** Message handlers ******************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -/* These are the handlers used internally by upb_msgfactory_getmergehandlers(). - * They write scalar data to a known offset from the message pointer. - * - * These would be trivial for anyone to implement themselves, but it's better - * to use these because some JITs will recognize and specialize these instead - * of actually calling the function. */ - -/* Sets a handler for the given primitive field that will write the data at the - * given offset. If hasbit > 0, also sets a hasbit at the given bit offset - * (addressing each byte low to high). */ -bool upb_msg_setscalarhandler(upb_handlers *h, - const upb_fielddef *f, - size_t offset, - int32_t hasbit); - -/* If the given handler is a msghandlers_primitive field, returns true and sets - * *type, *offset and *hasbit. Otherwise returns false. */ -bool upb_msg_getscalarhandlerdata(const upb_handlers *h, - upb_selector_t s, - upb_fieldtype_t *type, - size_t *offset, - int32_t *hasbit); - - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" - -#include "upb/handlers-inl.h" - -#endif /* UPB_HANDLERS_H */ diff --git a/upb/json/parser.h b/upb/json/parser.h deleted file mode 100644 index d323c52ef8..0000000000 --- a/upb/json/parser.h +++ /dev/null @@ -1,140 +0,0 @@ -/* -** upb::json::Parser (upb_json_parser) -** -** Parses JSON according to a specific schema. -** Support for parsing arbitrary JSON (schema-less) will be added later. -*/ - -#ifndef UPB_JSON_PARSER_H_ -#define UPB_JSON_PARSER_H_ - -#include "upb/sink.h" - -#ifdef __cplusplus -namespace upb { -namespace json { -class CodeCache; -class ParserPtr; -class ParserMethodPtr; -} /* namespace json */ -} /* namespace upb */ -#endif - -/* upb_json_parsermethod ******************************************************/ - -struct upb_json_parsermethod; -typedef struct upb_json_parsermethod upb_json_parsermethod; - -#ifdef __cplusplus -extern "C" { -#endif - -const upb_byteshandler* upb_json_parsermethod_inputhandler( - const upb_json_parsermethod* m); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::json::ParserMethodPtr { - public: - ParserMethodPtr() : ptr_(nullptr) {} - ParserMethodPtr(const upb_json_parsermethod* ptr) : ptr_(ptr) {} - - const upb_json_parsermethod* ptr() const { return ptr_; } - - const BytesHandler* input_handler() const { - return upb_json_parsermethod_inputhandler(ptr()); - } - - private: - const upb_json_parsermethod* ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_json_parser ************************************************************/ - -/* Preallocation hint: parser won't allocate more bytes than this when first - * constructed. This hint may be an overestimate for some build configurations. - * But if the parser library is upgraded without recompiling the application, - * it may be an underestimate. */ -#define UPB_JSON_PARSER_SIZE 5712 - -struct upb_json_parser; -typedef struct upb_json_parser upb_json_parser; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_json_parser* upb_json_parser_create(upb_arena* a, - const upb_json_parsermethod* m, - const upb_symtab* symtab, - upb_sink output, - upb_status *status, - bool ignore_json_unknown); -upb_bytessink upb_json_parser_input(upb_json_parser* p); - -#ifdef __cplusplus -} /* extern "C" */ - -/* Parses an incoming BytesStream, pushing the results to the destination - * sink. */ -class upb::json::ParserPtr { - public: - ParserPtr(upb_json_parser* ptr) : ptr_(ptr) {} - - static ParserPtr Create(Arena* arena, ParserMethodPtr method, - SymbolTable* symtab, Sink output, Status* status, - bool ignore_json_unknown) { - upb_symtab* symtab_ptr = symtab ? symtab->ptr() : nullptr; - return ParserPtr(upb_json_parser_create( - arena->ptr(), method.ptr(), symtab_ptr, output.sink(), status->ptr(), - ignore_json_unknown)); - } - - BytesSink input() { return upb_json_parser_input(ptr_); } - - private: - upb_json_parser* ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_json_codecache *********************************************************/ - -/* Lazily builds and caches decoder methods that will push data to the given - * handlers. The upb_symtab object(s) must outlive this object. */ - -struct upb_json_codecache; -typedef struct upb_json_codecache upb_json_codecache; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_json_codecache *upb_json_codecache_new(void); -void upb_json_codecache_free(upb_json_codecache *cache); -const upb_json_parsermethod* upb_json_codecache_get(upb_json_codecache* cache, - const upb_msgdef* md); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::json::CodeCache { - public: - CodeCache() : ptr_(upb_json_codecache_new(), upb_json_codecache_free) {} - - /* Returns a DecoderMethod that can push data to the given handlers. - * If a suitable method already exists, it will be returned from the cache. */ - ParserMethodPtr Get(MessageDefPtr md) { - return upb_json_codecache_get(ptr_.get(), md.ptr()); - } - - private: - std::unique_ptr ptr_; -}; - -#endif - -#endif /* UPB_JSON_PARSER_H_ */ diff --git a/upb/json/parser.rl b/upb/json/parser.rl deleted file mode 100644 index e6a701a542..0000000000 --- a/upb/json/parser.rl +++ /dev/null @@ -1,2998 +0,0 @@ -/* -** upb::json::Parser (upb_json_parser) -** -** A parser that uses the Ragel State Machine Compiler to generate -** the finite automata. -** -** Ragel only natively handles regular languages, but we can manually -** program it a bit to handle context-free languages like JSON, by using -** the "fcall" and "fret" constructs. -** -** This parser can handle the basics, but needs several things to be fleshed -** out: -** -** - handling of unicode escape sequences (including high surrogate pairs). -** - properly check and report errors for unknown fields, stack overflow, -** improper array nesting (or lack of nesting). -** - handling of base64 sequences with padding characters. -** - handling of push-back (non-success returns from sink functions). -** - handling of keys/escape-sequences/etc that span input buffers. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "upb/json/parser.h" -#include "upb/pb/encoder.h" - -#include "upb/port_def.inc" - -#define UPB_JSON_MAX_DEPTH 64 - -/* Type of value message */ -enum { - VALUE_NULLVALUE = 0, - VALUE_NUMBERVALUE = 1, - VALUE_STRINGVALUE = 2, - VALUE_BOOLVALUE = 3, - VALUE_STRUCTVALUE = 4, - VALUE_LISTVALUE = 5 -}; - -/* Forward declare */ -static bool is_top_level(upb_json_parser *p); -static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type); -static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type); - -static bool is_number_wrapper_object(upb_json_parser *p); -static bool does_number_wrapper_start(upb_json_parser *p); -static bool does_number_wrapper_end(upb_json_parser *p); - -static bool is_string_wrapper_object(upb_json_parser *p); -static bool does_string_wrapper_start(upb_json_parser *p); -static bool does_string_wrapper_end(upb_json_parser *p); - -static bool does_fieldmask_start(upb_json_parser *p); -static bool does_fieldmask_end(upb_json_parser *p); -static void start_fieldmask_object(upb_json_parser *p); -static void end_fieldmask_object(upb_json_parser *p); - -static void start_wrapper_object(upb_json_parser *p); -static void end_wrapper_object(upb_json_parser *p); - -static void start_value_object(upb_json_parser *p, int value_type); -static void end_value_object(upb_json_parser *p); - -static void start_listvalue_object(upb_json_parser *p); -static void end_listvalue_object(upb_json_parser *p); - -static void start_structvalue_object(upb_json_parser *p); -static void end_structvalue_object(upb_json_parser *p); - -static void start_object(upb_json_parser *p); -static void end_object(upb_json_parser *p); - -static void start_any_object(upb_json_parser *p, const char *ptr); -static bool end_any_object(upb_json_parser *p, const char *ptr); - -static bool start_subobject(upb_json_parser *p); -static void end_subobject(upb_json_parser *p); - -static void start_member(upb_json_parser *p); -static void end_member(upb_json_parser *p); -static bool end_membername(upb_json_parser *p); - -static void start_any_member(upb_json_parser *p, const char *ptr); -static void end_any_member(upb_json_parser *p, const char *ptr); -static bool end_any_membername(upb_json_parser *p); - -size_t parse(void *closure, const void *hd, const char *buf, size_t size, - const upb_bufhandle *handle); -static bool end(void *closure, const void *hd); - -static const char eof_ch = 'e'; - -/* stringsink */ -typedef struct { - upb_byteshandler handler; - upb_bytessink sink; - char *ptr; - size_t len, size; -} upb_stringsink; - - -static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { - upb_stringsink *sink = _sink; - sink->len = 0; - UPB_UNUSED(hd); - UPB_UNUSED(size_hint); - return sink; -} - -static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, - size_t len, const upb_bufhandle *handle) { - upb_stringsink *sink = _sink; - size_t new_size = sink->size; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - while (sink->len + len > new_size) { - new_size *= 2; - } - - if (new_size != sink->size) { - sink->ptr = realloc(sink->ptr, new_size); - sink->size = new_size; - } - - memcpy(sink->ptr + sink->len, ptr, len); - sink->len += len; - - return len; -} - -void upb_stringsink_init(upb_stringsink *sink) { - upb_byteshandler_init(&sink->handler); - upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); - upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); - - upb_bytessink_reset(&sink->sink, &sink->handler, sink); - - sink->size = 32; - sink->ptr = malloc(sink->size); - sink->len = 0; -} - -void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); } - -typedef struct { - /* For encoding Any value field in binary format. */ - upb_handlercache *encoder_handlercache; - upb_stringsink stringsink; - - /* For decoding Any value field in json format. */ - upb_json_codecache *parser_codecache; - upb_sink sink; - upb_json_parser *parser; - - /* Mark the range of uninterpreted values in json input before type url. */ - const char *before_type_url_start; - const char *before_type_url_end; - - /* Mark the range of uninterpreted values in json input after type url. */ - const char *after_type_url_start; -} upb_jsonparser_any_frame; - -typedef struct { - upb_sink sink; - - /* The current message in which we're parsing, and the field whose value we're - * expecting next. */ - const upb_msgdef *m; - const upb_fielddef *f; - - /* The table mapping json name to fielddef for this message. */ - const upb_strtable *name_table; - - /* We are in a repeated-field context. We need this flag to decide whether to - * handle the array as a normal repeated field or a - * google.protobuf.ListValue/google.protobuf.Value. */ - bool is_repeated; - - /* We are in a repeated-field context, ready to emit mapentries as - * submessages. This flag alters the start-of-object (open-brace) behavior to - * begin a sequence of mapentry messages rather than a single submessage. */ - bool is_map; - - /* We are in a map-entry message context. This flag is set when parsing the - * value field of a single map entry and indicates to all value-field parsers - * (subobjects, strings, numbers, and bools) that the map-entry submessage - * should end as soon as the value is parsed. */ - bool is_mapentry; - - /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent - * message's map field that we're currently parsing. This differs from |f| - * because |f| is the field in the *current* message (i.e., the map-entry - * message itself), not the parent's field that leads to this map. */ - const upb_fielddef *mapfield; - - /* We are in an Any message context. This flag is set when parsing the Any - * message and indicates to all field parsers (subobjects, strings, numbers, - * and bools) that the parsed field should be serialized as binary data or - * cached (type url not found yet). */ - bool is_any; - - /* The type of packed message in Any. */ - upb_jsonparser_any_frame *any_frame; - - /* True if the field to be parsed is unknown. */ - bool is_unknown_field; -} upb_jsonparser_frame; - -static void init_frame(upb_jsonparser_frame* frame) { - frame->m = NULL; - frame->f = NULL; - frame->name_table = NULL; - frame->is_repeated = false; - frame->is_map = false; - frame->is_mapentry = false; - frame->mapfield = NULL; - frame->is_any = false; - frame->any_frame = NULL; - frame->is_unknown_field = false; -} - -struct upb_json_parser { - upb_arena *arena; - const upb_json_parsermethod *method; - upb_bytessink input_; - - /* Stack to track the JSON scopes we are in. */ - upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; - upb_jsonparser_frame *top; - upb_jsonparser_frame *limit; - - upb_status *status; - - /* Ragel's internal parsing stack for the parsing state machine. */ - int current_state; - int parser_stack[UPB_JSON_MAX_DEPTH]; - int parser_top; - - /* The handle for the current buffer. */ - const upb_bufhandle *handle; - - /* Accumulate buffer. See details in parser.rl. */ - const char *accumulated; - size_t accumulated_len; - char *accumulate_buf; - size_t accumulate_buf_size; - - /* Multi-part text data. See details in parser.rl. */ - int multipart_state; - upb_selector_t string_selector; - - /* Input capture. See details in parser.rl. */ - const char *capture; - - /* Intermediate result of parsing a unicode escape sequence. */ - uint32_t digit; - - /* For resolve type url in Any. */ - const upb_symtab *symtab; - - /* Whether to proceed if unknown field is met. */ - bool ignore_json_unknown; - - /* Cache for parsing timestamp due to base and zone are handled in different - * handlers. */ - struct tm tm; -}; - -static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) { - upb_jsonparser_frame *inner; - inner = p->top + 1; - init_frame(inner); - return inner; -} - -struct upb_json_codecache { - upb_arena *arena; - upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */ -}; - -struct upb_json_parsermethod { - const upb_json_codecache *cache; - upb_byteshandler input_handler_; - - /* Maps json_name -> fielddef */ - upb_strtable name_table; -}; - -#define PARSER_CHECK_RETURN(x) if (!(x)) return false - -static upb_jsonparser_any_frame *json_parser_any_frame_new( - upb_json_parser *p) { - upb_jsonparser_any_frame *frame; - - frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame)); - - frame->encoder_handlercache = upb_pb_encoder_newcache(); - frame->parser_codecache = upb_json_codecache_new(); - frame->parser = NULL; - frame->before_type_url_start = NULL; - frame->before_type_url_end = NULL; - frame->after_type_url_start = NULL; - - upb_stringsink_init(&frame->stringsink); - - return frame; -} - -static void json_parser_any_frame_set_payload_type( - upb_json_parser *p, - upb_jsonparser_any_frame *frame, - const upb_msgdef *payload_type) { - const upb_handlers *h; - const upb_json_parsermethod *parser_method; - upb_pb_encoder *encoder; - - /* Initialize encoder. */ - h = upb_handlercache_get(frame->encoder_handlercache, payload_type); - encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink); - - /* Initialize parser. */ - parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type); - upb_sink_reset(&frame->sink, h, encoder); - frame->parser = - upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink, - p->status, p->ignore_json_unknown); -} - -static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) { - upb_handlercache_free(frame->encoder_handlercache); - upb_json_codecache_free(frame->parser_codecache); - upb_stringsink_uninit(&frame->stringsink); -} - -static bool json_parser_any_frame_has_type_url( - upb_jsonparser_any_frame *frame) { - return frame->parser != NULL; -} - -static bool json_parser_any_frame_has_value_before_type_url( - upb_jsonparser_any_frame *frame) { - return frame->before_type_url_start != frame->before_type_url_end; -} - -static bool json_parser_any_frame_has_value_after_type_url( - upb_jsonparser_any_frame *frame) { - return frame->after_type_url_start != NULL; -} - -static bool json_parser_any_frame_has_value( - upb_jsonparser_any_frame *frame) { - return json_parser_any_frame_has_value_before_type_url(frame) || - json_parser_any_frame_has_value_after_type_url(frame); -} - -static void json_parser_any_frame_set_before_type_url_end( - upb_jsonparser_any_frame *frame, - const char *ptr) { - if (frame->parser == NULL) { - frame->before_type_url_end = ptr; - } -} - -static void json_parser_any_frame_set_after_type_url_start_once( - upb_jsonparser_any_frame *frame, - const char *ptr) { - if (json_parser_any_frame_has_type_url(frame) && - frame->after_type_url_start == NULL) { - frame->after_type_url_start = ptr; - } -} - -/* Used to signal that a capture has been suspended. */ -static char suspend_capture; - -static upb_selector_t getsel_for_handlertype(upb_json_parser *p, - upb_handlertype_t type) { - upb_selector_t sel; - bool ok = upb_handlers_getselector(p->top->f, type, &sel); - UPB_ASSUME(ok); - return sel; -} - -static upb_selector_t parser_getsel(upb_json_parser *p) { - return getsel_for_handlertype( - p, upb_handlers_getprimitivehandlertype(p->top->f)); -} - -static bool check_stack(upb_json_parser *p) { - if ((p->top + 1) == p->limit) { - upb_status_seterrmsg(p->status, "Nesting too deep"); - return false; - } - - return true; -} - -static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { - upb_value v; - const upb_json_codecache *cache = p->method->cache; - bool ok; - const upb_json_parsermethod *method; - - ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v); - UPB_ASSUME(ok); - method = upb_value_getconstptr(v); - - frame->name_table = &method->name_table; -} - -/* There are GCC/Clang built-ins for overflow checking which we could start - * using if there was any performance benefit to it. */ - -static bool checked_add(size_t a, size_t b, size_t *c) { - if (SIZE_MAX - a < b) return false; - *c = a + b; - return true; -} - -static size_t saturating_multiply(size_t a, size_t b) { - /* size_t is unsigned, so this is defined behavior even on overflow. */ - size_t ret = a * b; - if (b != 0 && ret / b != a) { - ret = SIZE_MAX; - } - return ret; -} - - -/* Base64 decoding ************************************************************/ - -/* TODO(haberman): make this streaming. */ - -static const signed char b64table[] = { - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, - 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, - 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, - -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, - 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, - 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, - -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, - 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, - 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, - 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1 -}; - -/* Returns the table value sign-extended to 32 bits. Knowing that the upper - * bits will be 1 for unrecognized characters makes it easier to check for - * this error condition later (see below). */ -int32_t b64lookup(unsigned char ch) { return b64table[ch]; } - -/* Returns true if the given character is not a valid base64 character or - * padding. */ -bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } - -static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, - size_t len) { - const char *limit = ptr + len; - for (; ptr < limit; ptr += 4) { - uint32_t val; - char output[3]; - - if (limit - ptr < 4) { - upb_status_seterrf(p->status, - "Base64 input for bytes field not a multiple of 4: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6 | - b64lookup(ptr[3]); - - /* Test the upper bit; returns true if any of the characters returned -1. */ - if (val & 0x80000000) { - goto otherchar; - } - - output[0] = val >> 16; - output[1] = (val >> 8) & 0xff; - output[2] = val & 0xff; - upb_sink_putstring(p->top->sink, sel, output, 3, NULL); - } - return true; - -otherchar: - if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) || - nonbase64(ptr[3]) ) { - upb_status_seterrf(p->status, - "Non-base64 characters in bytes field: %s", - upb_fielddef_name(p->top->f)); - return false; - } if (ptr[2] == '=') { - uint32_t val; - char output; - - /* Last group contains only two input bytes, one output byte. */ - if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { - goto badpadding; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12; - - UPB_ASSERT(!(val & 0x80000000)); - output = val >> 16; - upb_sink_putstring(p->top->sink, sel, &output, 1, NULL); - return true; - } else { - uint32_t val; - char output[2]; - - /* Last group contains only three input bytes, two output bytes. */ - if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { - goto badpadding; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6; - - output[0] = val >> 16; - output[1] = (val >> 8) & 0xff; - upb_sink_putstring(p->top->sink, sel, output, 2, NULL); - return true; - } - -badpadding: - upb_status_seterrf(p->status, - "Incorrect base64 padding for field: %s (%.*s)", - upb_fielddef_name(p->top->f), - 4, ptr); - return false; -} - - -/* Accumulate buffer **********************************************************/ - -/* Functionality for accumulating a buffer. - * - * Some parts of the parser need an entire value as a contiguous string. For - * example, to look up a member name in a hash table, or to turn a string into - * a number, the relevant library routines need the input string to be in - * contiguous memory, even if the value spanned two or more buffers in the - * input. These routines handle that. - * - * In the common case we can just point to the input buffer to get this - * contiguous string and avoid any actual copy. So we optimistically begin - * this way. But there are a few cases where we must instead copy into a - * separate buffer: - * - * 1. The string was not contiguous in the input (it spanned buffers). - * - * 2. The string included escape sequences that need to be interpreted to get - * the true value in a contiguous buffer. */ - -static void assert_accumulate_empty(upb_json_parser *p) { - UPB_ASSERT(p->accumulated == NULL); - UPB_ASSERT(p->accumulated_len == 0); -} - -static void accumulate_clear(upb_json_parser *p) { - p->accumulated = NULL; - p->accumulated_len = 0; -} - -/* Used internally by accumulate_append(). */ -static bool accumulate_realloc(upb_json_parser *p, size_t need) { - void *mem; - size_t old_size = p->accumulate_buf_size; - size_t new_size = UPB_MAX(old_size, 128); - while (new_size < need) { - new_size = saturating_multiply(new_size, 2); - } - - mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size); - if (!mem) { - upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); - return false; - } - - p->accumulate_buf = mem; - p->accumulate_buf_size = new_size; - return true; -} - -/* Logically appends the given data to the append buffer. - * If "can_alias" is true, we will try to avoid actually copying, but the buffer - * must be valid until the next accumulate_append() call (if any). */ -static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, - bool can_alias) { - size_t need; - - if (!p->accumulated && can_alias) { - p->accumulated = buf; - p->accumulated_len = len; - return true; - } - - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } - - if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { - return false; - } - - if (p->accumulated != p->accumulate_buf) { - if (p->accumulated_len) { - memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); - } - p->accumulated = p->accumulate_buf; - } - - memcpy(p->accumulate_buf + p->accumulated_len, buf, len); - p->accumulated_len += len; - return true; -} - -/* Returns a pointer to the data accumulated since the last accumulate_clear() - * call, and writes the length to *len. This with point either to the input - * buffer or a temporary accumulate buffer. */ -static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { - UPB_ASSERT(p->accumulated); - *len = p->accumulated_len; - return p->accumulated; -} - - -/* Mult-part text data ********************************************************/ - -/* When we have text data in the input, it can often come in multiple segments. - * For example, there may be some raw string data followed by an escape - * sequence. The two segments are processed with different logic. Also buffer - * seams in the input can cause multiple segments. - * - * As we see segments, there are two main cases for how we want to process them: - * - * 1. we want to push the captured input directly to string handlers. - * - * 2. we need to accumulate all the parts into a contiguous buffer for further - * processing (field name lookup, string->number conversion, etc). */ - -/* This is the set of states for p->multipart_state. */ -enum { - /* We are not currently processing multipart data. */ - MULTIPART_INACTIVE = 0, - - /* We are processing multipart data by accumulating it into a contiguous - * buffer. */ - MULTIPART_ACCUMULATE = 1, - - /* We are processing multipart data by pushing each part directly to the - * current string handlers. */ - MULTIPART_PUSHEAGERLY = 2 -}; - -/* Start a multi-part text value where we accumulate the data for processing at - * the end. */ -static void multipart_startaccum(upb_json_parser *p) { - assert_accumulate_empty(p); - UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); - p->multipart_state = MULTIPART_ACCUMULATE; -} - -/* Start a multi-part text value where we immediately push text data to a string - * value with the given selector. */ -static void multipart_start(upb_json_parser *p, upb_selector_t sel) { - assert_accumulate_empty(p); - UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); - p->multipart_state = MULTIPART_PUSHEAGERLY; - p->string_selector = sel; -} - -static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, - bool can_alias) { - switch (p->multipart_state) { - case MULTIPART_INACTIVE: - upb_status_seterrmsg( - p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); - return false; - - case MULTIPART_ACCUMULATE: - if (!accumulate_append(p, buf, len, can_alias)) { - return false; - } - break; - - case MULTIPART_PUSHEAGERLY: { - const upb_bufhandle *handle = can_alias ? p->handle : NULL; - upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle); - break; - } - } - - return true; -} - -/* Note: this invalidates the accumulate buffer! Call only after reading its - * contents. */ -static void multipart_end(upb_json_parser *p) { - /* This is false sometimes. Probably a bug of some sort, but this code is - * intended for deletion soon. */ - /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */ - p->multipart_state = MULTIPART_INACTIVE; - accumulate_clear(p); -} - - -/* Input capture **************************************************************/ - -/* Functionality for capturing a region of the input as text. Gracefully - * handles the case where a buffer seam occurs in the middle of the captured - * region. */ - -static void capture_begin(upb_json_parser *p, const char *ptr) { - UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); - UPB_ASSERT(p->capture == NULL); - p->capture = ptr; -} - -static bool capture_end(upb_json_parser *p, const char *ptr) { - UPB_ASSERT(p->capture); - if (multipart_text(p, p->capture, ptr - p->capture, true)) { - p->capture = NULL; - return true; - } else { - return false; - } -} - -/* This is called at the end of each input buffer (ie. when we have hit a - * buffer seam). If we are in the middle of capturing the input, this - * processes the unprocessed capture region. */ -static void capture_suspend(upb_json_parser *p, const char **ptr) { - if (!p->capture) return; - - if (multipart_text(p, p->capture, *ptr - p->capture, false)) { - /* We use this as a signal that we were in the middle of capturing, and - * that capturing should resume at the beginning of the next buffer. - * - * We can't use *ptr here, because we have no guarantee that this pointer - * will be valid when we resume (if the underlying memory is freed, then - * using the pointer at all, even to compare to NULL, is likely undefined - * behavior). */ - p->capture = &suspend_capture; - } else { - /* Need to back up the pointer to the beginning of the capture, since - * we were not able to actually preserve it. */ - *ptr = p->capture; - } -} - -static void capture_resume(upb_json_parser *p, const char *ptr) { - if (p->capture) { - UPB_ASSERT(p->capture == &suspend_capture); - p->capture = ptr; - } -} - - -/* Callbacks from the parser **************************************************/ - -/* These are the functions called directly from the parser itself. - * We define these in the same order as their declarations in the parser. */ - -static char escape_char(char in) { - switch (in) { - case 'r': return '\r'; - case 't': return '\t'; - case 'n': return '\n'; - case 'f': return '\f'; - case 'b': return '\b'; - case '/': return '/'; - case '"': return '"'; - case '\\': return '\\'; - default: - UPB_ASSERT(0); - return 'x'; - } -} - -static bool escape(upb_json_parser *p, const char *ptr) { - char ch = escape_char(*ptr); - return multipart_text(p, &ch, 1, false); -} - -static void start_hex(upb_json_parser *p) { - p->digit = 0; -} - -static void hexdigit(upb_json_parser *p, const char *ptr) { - char ch = *ptr; - - p->digit <<= 4; - - if (ch >= '0' && ch <= '9') { - p->digit += (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - p->digit += ((ch - 'a') + 10); - } else { - UPB_ASSERT(ch >= 'A' && ch <= 'F'); - p->digit += ((ch - 'A') + 10); - } -} - -static bool end_hex(upb_json_parser *p) { - uint32_t codepoint = p->digit; - - /* emit the codepoint as UTF-8. */ - char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ - int length = 0; - if (codepoint <= 0x7F) { - utf8[0] = codepoint; - length = 1; - } else if (codepoint <= 0x07FF) { - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x1F) | 0xC0; - length = 2; - } else /* codepoint <= 0xFFFF */ { - utf8[2] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x0F) | 0xE0; - length = 3; - } - /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate - * we have to wait for the next escape to get the full code point). */ - - return multipart_text(p, utf8, length, false); -} - -static void start_text(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_text(upb_json_parser *p, const char *ptr) { - return capture_end(p, ptr); -} - -static bool start_number(upb_json_parser *p, const char *ptr) { - if (is_top_level(p)) { - if (is_number_wrapper_object(p)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_NUMBERVALUE); - } else { - return false; - } - } else if (does_number_wrapper_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_NUMBERVALUE); - } - - multipart_startaccum(p); - capture_begin(p, ptr); - return true; -} - -static bool parse_number(upb_json_parser *p, bool is_quoted); - -static bool end_number_nontop(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - - if (p->top->f == NULL) { - multipart_end(p); - return true; - } - - return parse_number(p, false); -} - -static bool end_number(upb_json_parser *p, const char *ptr) { - if (!end_number_nontop(p, ptr)) { - return false; - } - - if (does_number_wrapper_end(p)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -/* |buf| is NULL-terminated. |buf| itself will never include quotes; - * |is_quoted| tells us whether this text originally appeared inside quotes. */ -static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, - bool is_quoted) { - size_t len = strlen(buf); - const char *bufend = buf + len; - char *end; - upb_fieldtype_t type = upb_fielddef_type(p->top->f); - double val; - double dummy; - double inf = INFINITY; - - errno = 0; - - if (len == 0 || buf[0] == ' ') { - return false; - } - - /* For integer types, first try parsing with integer-specific routines. - * If these succeed, they will be more accurate for int64/uint64 than - * strtod(). - */ - switch (type) { - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: { - long val = strtol(buf, &end, 0); - if (errno == ERANGE || end != bufend) { - break; - } else if (val > INT32_MAX || val < INT32_MIN) { - return false; - } else { - upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val); - return true; - } - UPB_UNREACHABLE(); - } - case UPB_TYPE_UINT32: { - unsigned long val = strtoul(buf, &end, 0); - if (end != bufend) { - break; - } else if (val > UINT32_MAX || errno == ERANGE) { - return false; - } else { - upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val); - return true; - } - UPB_UNREACHABLE(); - } - /* XXX: We can't handle [u]int64 properly on 32-bit machines because - * strto[u]ll isn't in C89. */ - case UPB_TYPE_INT64: { - long val = strtol(buf, &end, 0); - if (errno == ERANGE || end != bufend) { - break; - } else { - upb_sink_putint64(p->top->sink, parser_getsel(p), val); - return true; - } - UPB_UNREACHABLE(); - } - case UPB_TYPE_UINT64: { - unsigned long val = strtoul(p->accumulated, &end, 0); - if (end != bufend) { - break; - } else if (errno == ERANGE) { - return false; - } else { - upb_sink_putuint64(p->top->sink, parser_getsel(p), val); - return true; - } - UPB_UNREACHABLE(); - } - default: - break; - } - - if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { - /* Quoted numbers for integer types are not allowed to be in double form. */ - return false; - } - - if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { - /* C89 does not have an INFINITY macro. */ - val = inf; - } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { - val = -inf; - } else { - val = strtod(buf, &end); - if (errno == ERANGE || end != bufend) { - return false; - } - } - - switch (type) { -#define CASE(capitaltype, smalltype, ctype, min, max) \ - case UPB_TYPE_ ## capitaltype: { \ - if (modf(val, &dummy) != 0 || val > max || val < min) { \ - return false; \ - } else { \ - upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \ - (ctype)val); \ - return true; \ - } \ - break; \ - } - case UPB_TYPE_ENUM: - CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); - CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); - CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); - CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); -#undef CASE - - case UPB_TYPE_DOUBLE: - upb_sink_putdouble(p->top->sink, parser_getsel(p), val); - return true; - case UPB_TYPE_FLOAT: - if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { - return false; - } else { - upb_sink_putfloat(p->top->sink, parser_getsel(p), val); - return true; - } - default: - return false; - } -} - -static bool parse_number(upb_json_parser *p, bool is_quoted) { - size_t len; - const char *buf; - - /* strtol() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - if (!multipart_text(p, "\0", 1, false)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (parse_number_from_buffer(p, buf, is_quoted)) { - multipart_end(p); - return true; - } else { - upb_status_seterrf(p->status, "error parsing number: %s", buf); - multipart_end(p); - return false; - } -} - -static bool parser_putbool(upb_json_parser *p, bool val) { - bool ok; - - if (p->top->f == NULL) { - return true; - } - - if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { - upb_status_seterrf(p->status, - "Boolean value specified for non-bool field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val); - UPB_ASSERT(ok); - - return true; -} - -static bool end_bool(upb_json_parser *p, bool val) { - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_BOOLVALUE); - } else { - return false; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_BOOLVALUE); - } - - if (p->top->is_unknown_field) { - return true; - } - - if (!parser_putbool(p, val)) { - return false; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -static bool end_null(upb_json_parser *p) { - const char *zero_ptr = "0"; - - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_NULLVALUE); - } else { - return true; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_NULLVALUE); - } else { - return true; - } - - /* Fill null_value field. */ - multipart_startaccum(p); - capture_begin(p, zero_ptr); - capture_end(p, zero_ptr + 1); - parse_number(p, false); - - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - - return true; -} - -static bool start_any_stringval(upb_json_parser *p) { - multipart_startaccum(p); - return true; -} - -static bool start_stringval(upb_json_parser *p) { - if (is_top_level(p)) { - if (is_string_wrapper_object(p) || - is_number_wrapper_object(p)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { - start_fieldmask_object(p); - return true; - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { - start_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_STRINGVALUE); - } else { - return false; - } - } else if (does_string_wrapper_start(p) || - does_number_wrapper_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (does_fieldmask_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_fieldmask_object(p); - return true; - } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) { - if (!start_subobject(p)) { - return false; - } - start_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_STRINGVALUE); - } - - if (p->top->f == NULL) { - multipart_startaccum(p); - return true; - } - - if (p->top->is_any) { - return start_any_stringval(p); - } - - if (upb_fielddef_isstring(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (!check_stack(p)) return false; - - /* Start a new parser frame: parser frames correspond one-to-one with - * handler frames, and string events occur in a sub-frame. */ - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { - /* For STRING fields we push data directly to the handlers as it is - * parsed. We don't do this yet for BYTES fields, because our base64 - * decoder is not streaming. - * - * TODO(haberman): make base64 decoding streaming also. */ - multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); - return true; - } else { - multipart_startaccum(p); - return true; - } - } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && - upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { - /* No need to push a frame -- numeric values in quotes remain in the - * current parser frame. These values must accmulate so we can convert - * them all at once at the end. */ - multipart_startaccum(p); - return true; - } else { - upb_status_seterrf(p->status, - "String specified for bool or submessage field: %s", - upb_fielddef_name(p->top->f)); - return false; - } -} - -static bool end_any_stringval(upb_json_parser *p) { - size_t len; - const char *buf = accumulate_getptr(p, &len); - - /* Set type_url */ - upb_selector_t sel; - upb_jsonparser_frame *inner; - if (!check_stack(p)) return false; - inner = p->top + 1; - - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(inner->sink, sel, buf, len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(inner->sink, sel); - - multipart_end(p); - - /* Resolve type url */ - if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) { - const upb_msgdef *payload_type = NULL; - buf += 20; - len -= 20; - - payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len); - if (payload_type == NULL) { - upb_status_seterrf( - p->status, "Cannot find packed type: %.*s\n", (int)len, buf); - return false; - } - - json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type); - - return true; - } else { - upb_status_seterrf( - p->status, "Invalid type url: %.*s\n", (int)len, buf); - return false; - } -} - -static bool end_stringval_nontop(upb_json_parser *p) { - bool ok = true; - - if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { - multipart_end(p); - return true; - } - - if (p->top->f == NULL) { - multipart_end(p); - return true; - } - - if (p->top->is_any) { - return end_any_stringval(p); - } - - switch (upb_fielddef_type(p->top->f)) { - case UPB_TYPE_BYTES: - if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), - p->accumulated, p->accumulated_len)) { - return false; - } - /* Fall through. */ - - case UPB_TYPE_STRING: { - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(p->top->sink, sel); - p->top--; - break; - } - - case UPB_TYPE_ENUM: { - /* Resolve enum symbolic name to integer value. */ - const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f); - - size_t len; - const char *buf = accumulate_getptr(p, &len); - - int32_t int_val = 0; - ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); - - if (ok) { - upb_selector_t sel = parser_getsel(p); - upb_sink_putint32(p->top->sink, sel, int_val); - } else { - if (p->ignore_json_unknown) { - ok = true; - /* TODO(teboring): Should also clean this field. */ - } else { - upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", (int)len, - buf); - } - } - - break; - } - - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - case UPB_TYPE_FLOAT: - ok = parse_number(p, true); - break; - - default: - UPB_ASSERT(false); - upb_status_seterrmsg(p->status, "Internal error in JSON decoder"); - ok = false; - break; - } - - multipart_end(p); - - return ok; -} - -static bool end_stringval(upb_json_parser *p) { - /* FieldMask's stringvals have been ended when handling them. Only need to - * close FieldMask here.*/ - if (does_fieldmask_end(p)) { - end_fieldmask_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (!end_stringval_nontop(p)) { - return false; - } - - if (does_string_wrapper_end(p) || - does_number_wrapper_end(p)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) || - is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { - end_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -static void start_duration_base(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_duration_base(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - char seconds_buf[14]; - char nanos_buf[12]; - char *end; - int64_t seconds = 0; - int32_t nanos = 0; - double val = 0.0; - const char *seconds_membername = "seconds"; - const char *nanos_membername = "nanos"; - size_t fraction_start; - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - memset(seconds_buf, 0, 14); - memset(nanos_buf, 0, 12); - - /* Find out base end. The maximus duration is 315576000000, which cannot be - * represented by double without losing precision. Thus, we need to handle - * fraction and base separately. */ - for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.'; - fraction_start++); - - /* Parse base */ - memcpy(seconds_buf, buf, fraction_start); - seconds = strtol(seconds_buf, &end, 10); - if (errno == ERANGE || end != seconds_buf + fraction_start) { - upb_status_seterrf(p->status, "error parsing duration: %s", - seconds_buf); - return false; - } - - if (seconds > 315576000000) { - upb_status_seterrf(p->status, "error parsing duration: " - "maximum acceptable value is " - "315576000000"); - return false; - } - - if (seconds < -315576000000) { - upb_status_seterrf(p->status, "error parsing duration: " - "minimum acceptable value is " - "-315576000000"); - return false; - } - - /* Parse fraction */ - nanos_buf[0] = '0'; - memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start); - val = strtod(nanos_buf, &end); - if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) { - upb_status_seterrf(p->status, "error parsing duration: %s", - nanos_buf); - return false; - } - - nanos = val * 1000000000; - if (seconds < 0) nanos = -nanos; - - /* Clean up buffer */ - multipart_end(p); - - /* Set seconds */ - start_member(p); - capture_begin(p, seconds_membername); - capture_end(p, seconds_membername + 7); - end_membername(p); - upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); - end_member(p); - - /* Set nanos */ - start_member(p); - capture_begin(p, nanos_membername); - capture_end(p, nanos_membername + 5); - end_membername(p); - upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); - end_member(p); - - /* Continue previous arena */ - multipart_startaccum(p); - - return true; -} - -static int parse_timestamp_number(upb_json_parser *p) { - size_t len; - const char *buf; - int val; - - /* atoi() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - multipart_text(p, "\0", 1, false); - - buf = accumulate_getptr(p, &len); - val = atoi(buf); - multipart_end(p); - multipart_startaccum(p); - - return val; -} - -static void start_year(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_year(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_year = parse_timestamp_number(p) - 1900; - return true; -} - -static void start_month(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_month(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_mon = parse_timestamp_number(p) - 1; - return true; -} - -static void start_day(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_day(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_mday = parse_timestamp_number(p); - return true; -} - -static void start_hour(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_hour(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_hour = parse_timestamp_number(p); - return true; -} - -static void start_minute(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_minute(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_min = parse_timestamp_number(p); - return true; -} - -static void start_second(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_second(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_sec = parse_timestamp_number(p); - return true; -} - -static void start_timestamp_base(upb_json_parser *p) { - memset(&p->tm, 0, sizeof(struct tm)); -} - -static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - char nanos_buf[12]; - char *end; - double val = 0.0; - int32_t nanos; - const char *nanos_membername = "nanos"; - - memset(nanos_buf, 0, 12); - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (len > 10) { - upb_status_seterrf(p->status, - "error parsing timestamp: at most 9-digit fraction."); - return false; - } - - /* Parse nanos */ - nanos_buf[0] = '0'; - memcpy(nanos_buf + 1, buf, len); - val = strtod(nanos_buf, &end); - - if (errno == ERANGE || end != nanos_buf + len + 1) { - upb_status_seterrf(p->status, "error parsing timestamp nanos: %s", - nanos_buf); - return false; - } - - nanos = val * 1000000000; - - /* Clean up previous environment */ - multipart_end(p); - - /* Set nanos */ - start_member(p); - capture_begin(p, nanos_membername); - capture_end(p, nanos_membername + 5); - end_membername(p); - upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); - end_member(p); - - /* Continue previous environment */ - multipart_startaccum(p); - - return true; -} - -static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ -static int epoch_days(int year, int month, int day) { - static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, - 181, 212, 243, 273, 304, 334}; - uint32_t year_adj = year + 4800; /* Ensure positive year, multiple of 400. */ - uint32_t febs = year_adj - (month <= 2 ? 1 : 0); /* Februaries since base. */ - uint32_t leap_days = 1 + (febs / 4) - (febs / 100) + (febs / 400); - uint32_t days = 365 * year_adj + leap_days + month_yday[month - 1] + day - 1; - return days - 2472692; /* Adjust to Unix epoch. */ -} - -static int64_t upb_timegm(const struct tm *tp) { - int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); - ret = (ret * 24) + tp->tm_hour; - ret = (ret * 60) + tp->tm_min; - ret = (ret * 60) + tp->tm_sec; - return ret; -} - -static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - int hours; - int64_t seconds; - const char *seconds_membername = "seconds"; - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (buf[0] != 'Z') { - if (sscanf(buf + 1, "%2d:00", &hours) != 1) { - upb_status_seterrf(p->status, "error parsing timestamp offset"); - return false; - } - - if (buf[0] == '+') { - hours = -hours; - } - - p->tm.tm_hour += hours; - } - - /* Normalize tm */ - seconds = upb_timegm(&p->tm); - - /* Check timestamp boundary */ - if (seconds < -62135596800) { - upb_status_seterrf(p->status, "error parsing timestamp: " - "minimum acceptable value is " - "0001-01-01T00:00:00Z"); - return false; - } - - /* Clean up previous environment */ - multipart_end(p); - - /* Set seconds */ - start_member(p); - capture_begin(p, seconds_membername); - capture_end(p, seconds_membername + 7); - end_membername(p); - upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); - end_member(p); - - /* Continue previous environment */ - multipart_startaccum(p); - - return true; -} - -static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) { - return capture_end(p, ptr); -} - -static bool start_fieldmask_path(upb_json_parser *p) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (!check_stack(p)) return false; - - /* Start a new parser frame: parser frames correspond one-to-one with - * handler frames, and string events occur in a sub-frame. */ - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - multipart_startaccum(p); - return true; -} - -static bool lower_camel_push( - upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) { - const char *limit = ptr + len; - bool first = true; - for (;ptr < limit; ptr++) { - if (*ptr >= 'A' && *ptr <= 'Z' && !first) { - char lower = tolower(*ptr); - upb_sink_putstring(p->top->sink, sel, "_", 1, NULL); - upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL); - } else { - upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL); - } - first = false; - } - return true; -} - -static bool end_fieldmask_path(upb_json_parser *p) { - upb_selector_t sel; - - if (!lower_camel_push( - p, getsel_for_handlertype(p, UPB_HANDLER_STRING), - p->accumulated, p->accumulated_len)) { - return false; - } - - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(p->top->sink, sel); - p->top--; - - multipart_end(p); - return true; -} - -static void start_member(upb_json_parser *p) { - UPB_ASSERT(!p->top->f); - multipart_startaccum(p); -} - -/* Helper: invoked during parse_mapentry() to emit the mapentry message's key - * field based on the current contents of the accumulate buffer. */ -static bool parse_mapentry_key(upb_json_parser *p) { - - size_t len; - const char *buf = accumulate_getptr(p, &len); - - /* Emit the key field. We do a bit of ad-hoc parsing here because the - * parser state machine has already decided that this is a string field - * name, and we are reinterpreting it as some arbitrary key type. In - * particular, integer and bool keys are quoted, so we need to parse the - * quoted string contents here. */ - - p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); - if (p->top->f == NULL) { - upb_status_seterrmsg(p->status, "mapentry message has no key"); - return false; - } - switch (upb_fielddef_type(p->top->f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - /* Invoke end_number. The accum buffer has the number's text already. */ - if (!parse_number(p, true)) { - return false; - } - break; - case UPB_TYPE_BOOL: - if (len == 4 && !strncmp(buf, "true", 4)) { - if (!parser_putbool(p, true)) { - return false; - } - } else if (len == 5 && !strncmp(buf, "false", 5)) { - if (!parser_putbool(p, false)) { - return false; - } - } else { - upb_status_seterrmsg(p->status, - "Map bool key not 'true' or 'false'"); - return false; - } - multipart_end(p); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - upb_sink subsink; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, len, &subsink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(subsink, sel, buf, len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(subsink, sel); - multipart_end(p); - break; - } - default: - upb_status_seterrmsg(p->status, "Invalid field type for map key"); - return false; - } - - return true; -} - -/* Helper: emit one map entry (as a submessage in the map field sequence). This - * is invoked from end_membername(), at the end of the map entry's key string, - * with the map key in the accumulate buffer. It parses the key from that - * buffer, emits the handler calls to start the mapentry submessage (setting up - * its subframe in the process), and sets up state in the subframe so that the - * value parser (invoked next) will emit the mapentry's value field and then - * end the mapentry message. */ - -static bool handle_mapentry(upb_json_parser *p) { - const upb_fielddef *mapfield; - const upb_msgdef *mapentrymsg; - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Map entry: p->top->sink is the seq frame, so we need to start a frame - * for the mapentry itself, and then set |f| in that frame so that the map - * value field is parsed, and also set a flag to end the frame after the - * map-entry value is parsed. */ - if (!check_stack(p)) return false; - - mapfield = p->top->mapfield; - mapentrymsg = upb_fielddef_msgsubdef(mapfield); - - inner = start_jsonparser_frame(p); - p->top->f = mapfield; - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); - inner->m = mapentrymsg; - inner->mapfield = mapfield; - - /* Don't set this to true *yet* -- we reuse parsing handlers below to push - * the key field value to the sink, and these handlers will pop the frame - * if they see is_mapentry (when invoked by the parser state machine, they - * would have just seen the map-entry value, not key). */ - inner->is_mapentry = false; - p->top = inner; - - /* send STARTMSG in submsg frame. */ - upb_sink_startmsg(p->top->sink); - - parse_mapentry_key(p); - - /* Set up the value field to receive the map-entry value. */ - p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); - p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ - p->top->mapfield = mapfield; - if (p->top->f == NULL) { - upb_status_seterrmsg(p->status, "mapentry message has no value"); - return false; - } - - return true; -} - -static bool end_membername(upb_json_parser *p) { - UPB_ASSERT(!p->top->f); - - if (!p->top->m) { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } - - if (p->top->is_any) { - return end_any_membername(p); - } else if (p->top->is_map) { - return handle_mapentry(p); - } else { - size_t len; - const char *buf = accumulate_getptr(p, &len); - upb_value v; - - if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { - p->top->f = upb_value_getconstptr(v); - multipart_end(p); - - return true; - } else if (p->ignore_json_unknown) { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } else { - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); - return false; - } - } -} - -static bool end_any_membername(upb_json_parser *p) { - size_t len; - const char *buf = accumulate_getptr(p, &len); - upb_value v; - - if (len == 5 && strncmp(buf, "@type", len) == 0) { - upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v); - p->top->f = upb_value_getconstptr(v); - multipart_end(p); - return true; - } else { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } -} - -static void end_member(upb_json_parser *p) { - /* If we just parsed a map-entry value, end that frame too. */ - if (p->top->is_mapentry) { - upb_selector_t sel; - bool ok; - const upb_fielddef *mapfield; - - UPB_ASSERT(p->top > p->stack); - /* send ENDMSG on submsg. */ - upb_sink_endmsg(p->top->sink, p->status); - mapfield = p->top->mapfield; - - /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ - p->top--; - ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); - UPB_ASSUME(ok); - upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); - } - - p->top->f = NULL; - p->top->is_unknown_field = false; -} - -static void start_any_member(upb_json_parser *p, const char *ptr) { - start_member(p); - json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr); -} - -static void end_any_member(upb_json_parser *p, const char *ptr) { - json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr); - end_member(p); -} - -static bool start_subobject(upb_json_parser *p) { - if (p->top->is_unknown_field) { - if (!check_stack(p)) return false; - - p->top = start_jsonparser_frame(p); - return true; - } - - if (upb_fielddef_ismap(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Beginning of a map. Start a new parser frame in a repeated-field - * context. */ - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->mapfield = p->top->f; - inner->is_map = true; - p->top = inner; - - return true; - } else if (upb_fielddef_issubmsg(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Beginning of a subobject. Start a new parser frame in the submsg - * context. */ - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - set_name_table(p, inner); - p->top = inner; - - if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { - p->top->is_any = true; - p->top->any_frame = json_parser_any_frame_new(p); - } else { - p->top->is_any = false; - p->top->any_frame = NULL; - } - - return true; - } else { - upb_status_seterrf(p->status, - "Object specified for non-message/group field: %s", - upb_fielddef_name(p->top->f)); - return false; - } -} - -static bool start_subobject_full(upb_json_parser *p) { - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_STRUCTVALUE); - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { - start_structvalue_object(p); - } else { - return true; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) { - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) return false; - start_value_object(p, VALUE_STRUCTVALUE); - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } - - return start_subobject(p); -} - -static void end_subobject(upb_json_parser *p) { - if (is_top_level(p)) { - return; - } - - if (p->top->is_map) { - upb_selector_t sel; - p->top--; - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(p->top->sink, sel); - } else { - upb_selector_t sel; - bool is_unknown = p->top->m == NULL; - p->top--; - if (!is_unknown) { - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); - } - } -} - -static void end_subobject_full(upb_json_parser *p) { - end_subobject(p); - - if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { - end_structvalue_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } -} - -static bool start_array(upb_json_parser *p) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_LISTVALUE); - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { - start_listvalue_object(p); - } else { - return false; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) && - (!upb_fielddef_isseq(p->top->f) || - p->top->is_repeated)) { - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) && - (!upb_fielddef_isseq(p->top->f) || - p->top->is_repeated)) { - if (!start_subobject(p)) return false; - start_value_object(p, VALUE_LISTVALUE); - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } - - if (p->top->is_unknown_field) { - inner = start_jsonparser_frame(p); - inner->is_unknown_field = true; - p->top = inner; - - return true; - } - - if (!upb_fielddef_isseq(p->top->f)) { - upb_status_seterrf(p->status, - "Array specified for non-repeated field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(p->top->sink, sel, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - inner->is_repeated = true; - p->top = inner; - - return true; -} - -static void end_array(upb_json_parser *p) { - upb_selector_t sel; - - UPB_ASSERT(p->top > p->stack); - - p->top--; - - if (p->top->is_unknown_field) { - return; - } - - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(p->top->sink, sel); - - if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { - end_listvalue_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } -} - -static void start_object(upb_json_parser *p) { - if (!p->top->is_map && p->top->m != NULL) { - upb_sink_startmsg(p->top->sink); - } -} - -static void end_object(upb_json_parser *p) { - if (!p->top->is_map && p->top->m != NULL) { - upb_sink_endmsg(p->top->sink, p->status); - } -} - -static void start_any_object(upb_json_parser *p, const char *ptr) { - start_object(p); - p->top->any_frame->before_type_url_start = ptr; - p->top->any_frame->before_type_url_end = ptr; -} - -static bool end_any_object(upb_json_parser *p, const char *ptr) { - const char *value_membername = "value"; - bool is_well_known_packed = false; - const char *packed_end = ptr + 1; - upb_selector_t sel; - upb_jsonparser_frame *inner; - - if (json_parser_any_frame_has_value(p->top->any_frame) && - !json_parser_any_frame_has_type_url(p->top->any_frame)) { - upb_status_seterrmsg(p->status, "No valid type url"); - return false; - } - - /* Well known types data is represented as value field. */ - if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) != - UPB_WELLKNOWN_UNSPECIFIED) { - is_well_known_packed = true; - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { - p->top->any_frame->before_type_url_start = - memchr(p->top->any_frame->before_type_url_start, ':', - p->top->any_frame->before_type_url_end - - p->top->any_frame->before_type_url_start); - if (p->top->any_frame->before_type_url_start == NULL) { - upb_status_seterrmsg(p->status, "invalid data for well known type."); - return false; - } - p->top->any_frame->before_type_url_start++; - } - - if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - p->top->any_frame->after_type_url_start = - memchr(p->top->any_frame->after_type_url_start, ':', - (ptr + 1) - - p->top->any_frame->after_type_url_start); - if (p->top->any_frame->after_type_url_start == NULL) { - upb_status_seterrmsg(p->status, "Invalid data for well known type."); - return false; - } - p->top->any_frame->after_type_url_start++; - packed_end = ptr; - } - } - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, - p->top->any_frame->before_type_url_start, - p->top->any_frame->before_type_url_end - - p->top->any_frame->before_type_url_start, NULL)) { - return false; - } - } else { - if (!is_well_known_packed) { - if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) { - return false; - } - } - } - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) && - json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) { - return false; - } - } - - if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, - p->top->any_frame->after_type_url_start, - packed_end - p->top->any_frame->after_type_url_start, NULL)) { - return false; - } - } else { - if (!is_well_known_packed) { - if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) { - return false; - } - } - } - - if (!end(p->top->any_frame->parser, NULL)) { - return false; - } - - p->top->is_any = false; - - /* Set value */ - start_member(p); - capture_begin(p, value_membername); - capture_end(p, value_membername + 5); - end_membername(p); - - if (!check_stack(p)) return false; - inner = p->top + 1; - - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr, - p->top->any_frame->stringsink.len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(inner->sink, sel); - - end_member(p); - - end_object(p); - - /* Deallocate any parse frame. */ - json_parser_any_frame_free(p->top->any_frame); - - return true; -} - -static bool is_string_wrapper(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type == UPB_WELLKNOWN_STRINGVALUE || - type == UPB_WELLKNOWN_BYTESVALUE; -} - -static bool is_fieldmask(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type == UPB_WELLKNOWN_FIELDMASK; -} - -static void start_fieldmask_object(upb_json_parser *p) { - const char *membername = "paths"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + 5); - end_membername(p); - - start_array(p); -} - -static void end_fieldmask_object(upb_json_parser *p) { - end_array(p); - end_member(p); - end_object(p); -} - -static void start_wrapper_object(upb_json_parser *p) { - const char *membername = "value"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + 5); - end_membername(p); -} - -static void end_wrapper_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_value_object(upb_json_parser *p, int value_type) { - const char *nullmember = "null_value"; - const char *numbermember = "number_value"; - const char *stringmember = "string_value"; - const char *boolmember = "bool_value"; - const char *structmember = "struct_value"; - const char *listmember = "list_value"; - const char *membername = ""; - - switch (value_type) { - case VALUE_NULLVALUE: - membername = nullmember; - break; - case VALUE_NUMBERVALUE: - membername = numbermember; - break; - case VALUE_STRINGVALUE: - membername = stringmember; - break; - case VALUE_BOOLVALUE: - membername = boolmember; - break; - case VALUE_STRUCTVALUE: - membername = structmember; - break; - case VALUE_LISTVALUE: - membername = listmember; - break; - } - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_value_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_listvalue_object(upb_json_parser *p) { - const char *membername = "values"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_listvalue_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_structvalue_object(upb_json_parser *p) { - const char *membername = "fields"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_structvalue_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static bool is_top_level(upb_json_parser *p) { - return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; -} - -static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { - return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type; -} - -static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f)) - == type); -} - -static bool does_number_wrapper_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_number_wrapper_end(upb_json_parser *p) { - return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); -} - -static bool is_number_wrapper_object(upb_json_parser *p) { - return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); -} - -static bool does_string_wrapper_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - is_string_wrapper(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_string_wrapper_end(upb_json_parser *p) { - return p->top->m != NULL && is_string_wrapper(p->top->m); -} - -static bool is_string_wrapper_object(upb_json_parser *p) { - return p->top->m != NULL && is_string_wrapper(p->top->m); -} - -static bool does_fieldmask_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - is_fieldmask(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_fieldmask_end(upb_json_parser *p) { - return p->top->m != NULL && is_fieldmask(p->top->m); -} - -#define CHECK_RETURN_TOP(x) if (!(x)) goto error - - -/* The actual parser **********************************************************/ - -/* What follows is the Ragel parser itself. The language is specified in Ragel - * and the actions call our C functions above. - * - * Ragel has an extensive set of functionality, and we use only a small part of - * it. There are many action types but we only use a few: - * - * ">" -- transition into a machine - * "%" -- transition out of a machine - * "@" -- transition into a final state of a machine. - * - * "@" transitions are tricky because a machine can transition into a final - * state repeatedly. But in some cases we know this can't happen, for example - * a string which is delimited by a final '"' can only transition into its - * final state once, when the closing '"' is seen. */ - -%%{ - machine json; - - ws = space*; - - integer = "0" | /[1-9]/ /[0-9]/*; - decimal = "." /[0-9]/+; - exponent = /[eE]/ /[+\-]/? /[0-9]/+; - - number_machine := - ("-"? integer decimal? exponent?) - %/{ fhold; fret; } - <: any - >{ fhold; fret; } - ; - number = /[0-9\-]/ >{ fhold; fcall number_machine; }; - - text = - /[^\\"]/+ - >{ start_text(parser, p); } - %{ CHECK_RETURN_TOP(end_text(parser, p)); } - ; - - unicode_char = - "\\u" - /[0-9A-Fa-f]/{4} - >{ start_hex(parser); } - ${ hexdigit(parser, p); } - %{ CHECK_RETURN_TOP(end_hex(parser)); } - ; - - escape_char = - "\\" - /[rtbfn"\/\\]/ - >{ CHECK_RETURN_TOP(escape(parser, p)); } - ; - - string_machine := - (text | unicode_char | escape_char)** - '"' - @{ fhold; fret; } - ; - - year = - (digit digit digit digit) - >{ start_year(parser, p); } - %{ CHECK_RETURN_TOP(end_year(parser, p)); } - ; - month = - (digit digit) - >{ start_month(parser, p); } - %{ CHECK_RETURN_TOP(end_month(parser, p)); } - ; - day = - (digit digit) - >{ start_day(parser, p); } - %{ CHECK_RETURN_TOP(end_day(parser, p)); } - ; - hour = - (digit digit) - >{ start_hour(parser, p); } - %{ CHECK_RETURN_TOP(end_hour(parser, p)); } - ; - minute = - (digit digit) - >{ start_minute(parser, p); } - %{ CHECK_RETURN_TOP(end_minute(parser, p)); } - ; - second = - (digit digit) - >{ start_second(parser, p); } - %{ CHECK_RETURN_TOP(end_second(parser, p)); } - ; - - duration_machine := - ("-"? integer decimal?) - >{ start_duration_base(parser, p); } - %{ CHECK_RETURN_TOP(end_duration_base(parser, p)); } - 's"' - @{ fhold; fret; } - ; - - timestamp_machine := - (year "-" month "-" day "T" hour ":" minute ":" second) - >{ start_timestamp_base(parser); } - ("." digit+)? - >{ start_timestamp_fraction(parser, p); } - %{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } - ([+\-] digit digit ":00" | "Z") - >{ start_timestamp_zone(parser, p); } - %{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } - '"' - @{ fhold; fret; } - ; - - fieldmask_path_text = - /[^",]/+ - >{ start_fieldmask_path_text(parser, p); } - %{ end_fieldmask_path_text(parser, p); } - ; - - fieldmask_path = - fieldmask_path_text - >{ start_fieldmask_path(parser); } - %{ end_fieldmask_path(parser); } - ; - - fieldmask_machine := - (fieldmask_path ("," fieldmask_path)*)? - '"' - @{ fhold; fret; } - ; - - string = - '"' - @{ - if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { - fcall timestamp_machine; - } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) { - fcall duration_machine; - } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) { - fcall fieldmask_machine; - } else { - fcall string_machine; - } - } - '"'; - - value2 = ^(space | "]" | "}") >{ fhold; fcall value_machine; } ; - - member = - ws - string - >{ - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - start_any_member(parser, p); - } else { - start_member(parser); - } - } - @{ CHECK_RETURN_TOP(end_membername(parser)); } - ws ":" ws - value2 - %{ - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - end_any_member(parser, p); - } else { - end_member(parser); - } - } - ws; - - object = - ("{" ws) - >{ - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - start_any_object(parser, p); - } else { - start_object(parser); - } - } - (member ("," member)*)? - "}" - >{ - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - CHECK_RETURN_TOP(end_any_object(parser, p)); - } else { - end_object(parser); - } - } - ; - - element = ws value2 ws; - array = - "[" - >{ CHECK_RETURN_TOP(start_array(parser)); } - ws - (element ("," element)*)? - "]" - >{ end_array(parser); } - ; - - value = - number - >{ CHECK_RETURN_TOP(start_number(parser, p)); } - %{ CHECK_RETURN_TOP(end_number(parser, p)); } - | string - >{ CHECK_RETURN_TOP(start_stringval(parser)); } - @{ CHECK_RETURN_TOP(end_stringval(parser)); } - | "true" - %{ CHECK_RETURN_TOP(end_bool(parser, true)); } - | "false" - %{ CHECK_RETURN_TOP(end_bool(parser, false)); } - | "null" - %{ CHECK_RETURN_TOP(end_null(parser)); } - | object - >{ CHECK_RETURN_TOP(start_subobject_full(parser)); } - %{ end_subobject_full(parser); } - | array; - - value_machine := - value - <: any >{ fhold; fret; } ; - - main := ws value ws; -}%% - -%% write data noerror nofinal; - -size_t parse(void *closure, const void *hd, const char *buf, size_t size, - const upb_bufhandle *handle) { - upb_json_parser *parser = closure; - - /* Variables used by Ragel's generated code. */ - int cs = parser->current_state; - int *stack = parser->parser_stack; - int top = parser->parser_top; - - const char *p = buf; - const char *pe = buf + size; - const char *eof = &eof_ch; - - parser->handle = handle; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - capture_resume(parser, buf); - - %% write exec; - - if (p != pe) { - upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", (int)(pe - p), - p); - } else { - capture_suspend(parser, &p); - } - -error: - /* Save parsing state back to parser. */ - parser->current_state = cs; - parser->parser_top = top; - - return p - buf; -} - -static bool end(void *closure, const void *hd) { - upb_json_parser *parser = closure; - - /* Prevent compile warning on unused static constants. */ - UPB_UNUSED(json_start); - UPB_UNUSED(json_en_duration_machine); - UPB_UNUSED(json_en_fieldmask_machine); - UPB_UNUSED(json_en_number_machine); - UPB_UNUSED(json_en_string_machine); - UPB_UNUSED(json_en_timestamp_machine); - UPB_UNUSED(json_en_value_machine); - UPB_UNUSED(json_en_main); - - parse(parser, hd, &eof_ch, 0, NULL); - - return parser->current_state >= %%{ write first_final; }%%; -} - -static void json_parser_reset(upb_json_parser *p) { - int cs; - int top; - - p->top = p->stack; - init_frame(p->top); - - /* Emit Ragel initialization of the parser. */ - %% write init; - p->current_state = cs; - p->parser_top = top; - accumulate_clear(p); - p->multipart_state = MULTIPART_INACTIVE; - p->capture = NULL; - p->accumulated = NULL; -} - -static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, - const upb_msgdef *md) { - int i, n; - upb_alloc *alloc = upb_arena_alloc(c->arena); - - upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m)); - - m->cache = c; - - upb_byteshandler_init(&m->input_handler_); - upb_byteshandler_setstring(&m->input_handler_, parse, m); - upb_byteshandler_setendstr(&m->input_handler_, end, m); - - upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc); - - /* Build name_table */ - - n = upb_msgdef_fieldcount(md); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - upb_value v = upb_value_constptr(f); - const char *name; - - /* Add an entry for the JSON name. */ - name = upb_fielddef_jsonname(f); - upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - - if (strcmp(name, upb_fielddef_name(f)) != 0) { - /* Since the JSON name is different from the regular field name, add an - * entry for the raw name (compliant proto3 JSON parsers must accept - * both). */ - const char *name = upb_fielddef_name(f); - upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - } - } - - return m; -} - -/* Public API *****************************************************************/ - -upb_json_parser *upb_json_parser_create(upb_arena *arena, - const upb_json_parsermethod *method, - const upb_symtab* symtab, - upb_sink output, - upb_status *status, - bool ignore_json_unknown) { - upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser)); - if (!p) return false; - - p->arena = arena; - p->method = method; - p->status = status; - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_bytessink_reset(&p->input_, &method->input_handler_, p); - - json_parser_reset(p); - p->top->sink = output; - p->top->m = upb_handlers_msgdef(output.handlers); - if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { - p->top->is_any = true; - p->top->any_frame = json_parser_any_frame_new(p); - } else { - p->top->is_any = false; - p->top->any_frame = NULL; - } - set_name_table(p, p->top); - p->symtab = symtab; - - p->ignore_json_unknown = ignore_json_unknown; - - return p; -} - -upb_bytessink upb_json_parser_input(upb_json_parser *p) { - return p->input_; -} - -const upb_byteshandler *upb_json_parsermethod_inputhandler( - const upb_json_parsermethod *m) { - return &m->input_handler_; -} - -upb_json_codecache *upb_json_codecache_new(void) { - upb_alloc *alloc; - upb_json_codecache *c; - - c = upb_gmalloc(sizeof(*c)); - - c->arena = upb_arena_new(); - alloc = upb_arena_alloc(c->arena); - - upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc); - - return c; -} - -void upb_json_codecache_free(upb_json_codecache *c) { - upb_arena_free(c->arena); - upb_gfree(c); -} - -const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c, - const upb_msgdef *md) { - upb_json_parsermethod *m; - upb_value v; - int i, n; - upb_alloc *alloc = upb_arena_alloc(c->arena); - - if (upb_inttable_lookupptr(&c->methods, md, &v)) { - return upb_value_getconstptr(v); - } - - m = parsermethod_new(c, md); - v = upb_value_constptr(m); - - if (!m) return NULL; - if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL; - - /* Populate parser methods for all submessages, so the name tables will - * be available during parsing. */ - n = upb_msgdef_fieldcount(md); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - - if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); - const upb_json_parsermethod *sub_method = - upb_json_codecache_get(c, subdef); - - if (!sub_method) return NULL; - } - } - - return m; -} diff --git a/upb/json/printer.c b/upb/json/printer.c deleted file mode 100644 index 2f0c8fb6c4..0000000000 --- a/upb/json/printer.c +++ /dev/null @@ -1,1396 +0,0 @@ -/* -** This currently uses snprintf() to format primitives, and could be optimized -** further. -*/ - -#include "upb/json/printer.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "upb/port_def.inc" - -struct upb_json_printer { - upb_sink input_; - /* BytesSink closure. */ - void *subc_; - upb_bytessink output_; - - /* We track the depth so that we know when to emit startstr/endstr on the - * output. */ - int depth_; - - /* Have we emitted the first element? This state is necessary to emit commas - * without leaving a trailing comma in arrays/maps. We keep this state per - * frame depth. - * - * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. - * We count frames (contexts in which we separate elements by commas) as both - * repeated fields and messages (maps), and the worst case is a - * message->repeated field->submessage->repeated field->... nesting. */ - bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; - - /* To print timestamp, printer needs to cache its seconds and nanos values - * and convert them when ending timestamp message. See comments of - * printer_sethandlers_timestamp for more detail. */ - int64_t seconds; - int32_t nanos; -}; - -/* StringPiece; a pointer plus a length. */ -typedef struct { - char *ptr; - size_t len; -} strpc; - -void freestrpc(void *ptr) { - strpc *pc = ptr; - upb_gfree(pc->ptr); - upb_gfree(pc); -} - -typedef struct { - bool preserve_fieldnames; -} upb_json_printercache; - -/* Convert fielddef name to JSON name and return as a string piece. */ -strpc *newstrpc(upb_handlers *h, const upb_fielddef *f, - bool preserve_fieldnames) { - /* TODO(haberman): handle malloc failure. */ - strpc *ret = upb_gmalloc(sizeof(*ret)); - if (preserve_fieldnames) { - ret->ptr = upb_gstrdup(upb_fielddef_name(f)); - ret->len = strlen(ret->ptr); - } else { - ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f)); - ret->len = strlen(ret->ptr); - } - - upb_handlers_addcleanup(h, ret, freestrpc); - return ret; -} - -/* Convert a null-terminated const char* to a string piece. */ -strpc *newstrpc_str(upb_handlers *h, const char * str) { - strpc * ret = upb_gmalloc(sizeof(*ret)); - ret->ptr = upb_gstrdup(str); - ret->len = strlen(str); - upb_handlers_addcleanup(h, ret, freestrpc); - return ret; -} - -/* ------------ JSON string printing: values, maps, arrays ------------------ */ - -static void print_data( - upb_json_printer *p, const char *buf, size_t len) { - /* TODO: Will need to change if we support pushback from the sink. */ - size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL); - UPB_ASSERT(n == len); -} - -static void print_comma(upb_json_printer *p) { - if (!p->first_elem_[p->depth_]) { - print_data(p, ",", 1); - } - p->first_elem_[p->depth_] = false; -} - -/* Helpers that print properly formatted elements to the JSON output stream. */ - -/* Used for escaping control chars in strings. */ -static const char kControlCharLimit = 0x20; - -UPB_INLINE bool is_json_escaped(char c) { - /* See RFC 4627. */ - unsigned char uc = (unsigned char)c; - return uc < kControlCharLimit || uc == '"' || uc == '\\'; -} - -UPB_INLINE const char* json_nice_escape(char c) { - switch (c) { - case '"': return "\\\""; - case '\\': return "\\\\"; - case '\b': return "\\b"; - case '\f': return "\\f"; - case '\n': return "\\n"; - case '\r': return "\\r"; - case '\t': return "\\t"; - default: return NULL; - } -} - -/* Write a properly escaped string chunk. The surrounding quotes are *not* - * printed; this is so that the caller has the option of emitting the string - * content in chunks. */ -static void putstring(upb_json_printer *p, const char *buf, size_t len) { - const char* unescaped_run = NULL; - unsigned int i; - for (i = 0; i < len; i++) { - char c = buf[i]; - /* Handle escaping. */ - if (is_json_escaped(c)) { - /* Use a "nice" escape, like \n, if one exists for this character. */ - const char* escape = json_nice_escape(c); - /* If we don't have a specific 'nice' escape code, use a \uXXXX-style - * escape. */ - char escape_buf[8]; - if (!escape) { - unsigned char byte = (unsigned char)c; - snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte); - escape = escape_buf; - } - - /* N.B. that we assume that the input encoding is equal to the output - * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we - * can simply pass the bytes through. */ - - /* If there's a current run of unescaped chars, print that run first. */ - if (unescaped_run) { - print_data(p, unescaped_run, &buf[i] - unescaped_run); - unescaped_run = NULL; - } - /* Then print the escape code. */ - print_data(p, escape, strlen(escape)); - } else { - /* Add to the current unescaped run of characters. */ - if (unescaped_run == NULL) { - unescaped_run = &buf[i]; - } - } - } - - /* If the string ended in a run of unescaped characters, print that last run. */ - if (unescaped_run) { - print_data(p, unescaped_run, &buf[len] - unescaped_run); - } -} - -#define CHKLENGTH(x) if (!(x)) return -1; - -/* Helpers that format floating point values according to our custom formats. - * Right now we use %.8g and %.17g for float/double, respectively, to match - * proto2::util::JsonFormat's defaults. May want to change this later. */ - -const char neginf[] = "\"-Infinity\""; -const char inf[] = "\"Infinity\""; - -static size_t fmt_double(double val, char* buf, size_t length) { - if (val == INFINITY) { - CHKLENGTH(length >= strlen(inf)); - strcpy(buf, inf); - return strlen(inf); - } else if (val == -INFINITY) { - CHKLENGTH(length >= strlen(neginf)); - strcpy(buf, neginf); - return strlen(neginf); - } else { - size_t n = snprintf(buf, length, "%.17g", val); - CHKLENGTH(n > 0 && n < length); - return n; - } -} - -static size_t fmt_float(float val, char* buf, size_t length) { - size_t n = snprintf(buf, length, "%.8g", val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_bool(bool val, char* buf, size_t length) { - size_t n = snprintf(buf, length, "%s", (val ? "true" : "false")); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) { - size_t n = snprintf(buf, length, "%" PRId64, val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) { - size_t n = snprintf(buf, length, "%" PRIu64, val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) { - size_t n = snprintf(buf, length, "\"%" PRId64 "\"", val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) { - size_t n = snprintf(buf, length, "\"%" PRIu64 "\"", val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -/* Print a map key given a field name. Called by scalar field handlers and by - * startseq for repeated fields. */ -static bool putkey(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - const strpc *key = handler_data; - print_comma(p); - print_data(p, "\"", 1); - putstring(p, key->ptr, key->len); - print_data(p, "\":", 2); - return true; -} - -#define CHKFMT(val) if ((val) == (size_t)-1) return false; -#define CHK(val) if (!(val)) return false; - -#define TYPE_HANDLERS(type, fmt_func) \ - static bool put##type(void *closure, const void *handler_data, type val) { \ - upb_json_printer *p = closure; \ - char data[64]; \ - size_t length = fmt_func(val, data, sizeof(data)); \ - UPB_UNUSED(handler_data); \ - CHKFMT(length); \ - print_data(p, data, length); \ - return true; \ - } \ - static bool scalar_##type(void *closure, const void *handler_data, \ - type val) { \ - CHK(putkey(closure, handler_data)); \ - CHK(put##type(closure, handler_data, val)); \ - return true; \ - } \ - static bool repeated_##type(void *closure, const void *handler_data, \ - type val) { \ - upb_json_printer *p = closure; \ - print_comma(p); \ - CHK(put##type(closure, handler_data, val)); \ - return true; \ - } - -#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ - static bool putmapkey_##type(void *closure, const void *handler_data, \ - type val) { \ - upb_json_printer *p = closure; \ - char data[64]; \ - size_t length = fmt_func(val, data, sizeof(data)); \ - UPB_UNUSED(handler_data); \ - print_data(p, "\"", 1); \ - print_data(p, data, length); \ - print_data(p, "\":", 2); \ - return true; \ - } - -TYPE_HANDLERS(double, fmt_double) -TYPE_HANDLERS(float, fmt_float) -TYPE_HANDLERS(bool, fmt_bool) -TYPE_HANDLERS(int32_t, fmt_int64_as_number) -TYPE_HANDLERS(uint32_t, fmt_int64_as_number) -TYPE_HANDLERS(int64_t, fmt_int64_as_string) -TYPE_HANDLERS(uint64_t, fmt_uint64_as_string) - -/* double and float are not allowed to be map keys. */ -TYPE_HANDLERS_MAPKEY(bool, fmt_bool) -TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64_as_number) -TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number) -TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64_as_number) -TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number) - -#undef TYPE_HANDLERS -#undef TYPE_HANDLERS_MAPKEY - -typedef struct { - void *keyname; - const upb_enumdef *enumdef; -} EnumHandlerData; - -static bool scalar_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - const char *symbolic_name; - - CHK(putkey(closure, hd->keyname)); - - symbolic_name = upb_enumdef_iton(hd->enumdef, val); - if (symbolic_name) { - print_data(p, "\"", 1); - putstring(p, symbolic_name, strlen(symbolic_name)); - print_data(p, "\"", 1); - } else { - putint32_t(closure, NULL, val); - } - - return true; -} - -static void print_enum_symbolic_name(upb_json_printer *p, - const upb_enumdef *def, - int32_t val) { - const char *symbolic_name = upb_enumdef_iton(def, val); - if (symbolic_name) { - print_data(p, "\"", 1); - putstring(p, symbolic_name, strlen(symbolic_name)); - print_data(p, "\"", 1); - } else { - putint32_t(p, NULL, val); - } -} - -static bool repeated_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - print_comma(p); - - print_enum_symbolic_name(p, hd->enumdef, val); - - return true; -} - -static bool mapvalue_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - - print_enum_symbolic_name(p, hd->enumdef, val); - - return true; -} - -static void *scalar_startsubmsg(void *closure, const void *handler_data) { - return putkey(closure, handler_data) ? closure : UPB_BREAK; -} - -static void *repeated_startsubmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_comma(p); - return closure; -} - -static void start_frame(upb_json_printer *p) { - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); -} - -static void end_frame(upb_json_printer *p) { - print_data(p, "}", 1); - p->depth_--; -} - -static bool printer_startmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - start_frame(p); - return true; -} - -static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - end_frame(p); - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -static void *startseq(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - CHK(putkey(closure, handler_data)); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "[", 1); - return closure; -} - -static bool endseq(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "]", 1); - p->depth_--; - return true; -} - -static void *startmap(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - CHK(putkey(closure, handler_data)); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); - return closure; -} - -static bool endmap(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "}", 1); - p->depth_--; - return true; -} - -static size_t putstr(void *closure, const void *handler_data, const char *str, - size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(handle); - putstring(p, str, len); - return len; -} - -/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */ -static size_t putbytes(void *closure, const void *handler_data, const char *str, - size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - - /* This is the regular base64, not the "web-safe" version. */ - static const char base64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - /* Base64-encode. */ - char data[16000]; - const char *limit = data + sizeof(data); - const unsigned char *from = (const unsigned char*)str; - char *to = data; - size_t remaining = len; - size_t bytes; - - UPB_UNUSED(handler_data); - UPB_UNUSED(handle); - - print_data(p, "\"", 1); - - while (remaining > 2) { - if (limit - to < 4) { - bytes = to - data; - putstring(p, data, bytes); - to = data; - } - - to[0] = base64[from[0] >> 2]; - to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; - to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)]; - to[3] = base64[from[2] & 0x3f]; - - remaining -= 3; - to += 4; - from += 3; - } - - switch (remaining) { - case 2: - to[0] = base64[from[0] >> 2]; - to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; - to[2] = base64[(from[1] & 0xf) << 2]; - to[3] = '='; - to += 4; - from += 2; - break; - case 1: - to[0] = base64[from[0] >> 2]; - to[1] = base64[((from[0] & 0x3) << 4)]; - to[2] = '='; - to[3] = '='; - to += 4; - from += 1; - break; - } - - bytes = to - data; - putstring(p, data, bytes); - print_data(p, "\"", 1); - return len; -} - -static void *scalar_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - CHK(putkey(closure, handler_data)); - print_data(p, "\"", 1); - return p; -} - -static size_t scalar_str(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putstr(closure, handler_data, str, len, handle)); - return len; -} - -static bool scalar_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\"", 1); - return true; -} - -static void *repeated_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_comma(p); - print_data(p, "\"", 1); - return p; -} - -static size_t repeated_str(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putstr(closure, handler_data, str, len, handle)); - return len; -} - -static bool repeated_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\"", 1); - return true; -} - -static void *mapkeyval_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_data(p, "\"", 1); - return p; -} - -static size_t mapkey_str(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putstr(closure, handler_data, str, len, handle)); - return len; -} - -static bool mapkey_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\":", 2); - return true; -} - -static bool mapvalue_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\"", 1); - return true; -} - -static size_t scalar_bytes(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putkey(closure, handler_data)); - CHK(putbytes(closure, handler_data, str, len, handle)); - return len; -} - -static size_t repeated_bytes(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - upb_json_printer *p = closure; - print_comma(p); - CHK(putbytes(closure, handler_data, str, len, handle)); - return len; -} - -static size_t mapkey_bytes(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - upb_json_printer *p = closure; - CHK(putbytes(closure, handler_data, str, len, handle)); - print_data(p, ":", 1); - return len; -} - -static void set_enum_hd(upb_handlers *h, - const upb_fielddef *f, - bool preserve_fieldnames, - upb_handlerattr *attr) { - EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData)); - hd->enumdef = upb_fielddef_enumsubdef(f); - hd->keyname = newstrpc(h, f, preserve_fieldnames); - upb_handlers_addcleanup(h, hd, upb_gfree); - attr->handler_data = hd; -} - -/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair - * in a map). - * - * TODO: Handle missing key, missing value, out-of-order key/value, or repeated - * key or value cases properly. The right way to do this is to allocate a - * temporary structure at the start of a mapentry submessage, store key and - * value data in it as key and value handlers are called, and then print the - * key/value pair once at the end of the submessage. If we don't do this, we - * should at least detect the case and throw an error. However, so far all of - * our sources that emit mapentry messages do so canonically (with one key - * field, and then one value field), so this is not a pressing concern at the - * moment. */ -void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames, - upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - /* A mapentry message is printed simply as '"key": value'. Rather than - * special-case key and value for every type below, we just handle both - * fields explicitly here. */ - const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); - const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - UPB_UNUSED(closure); - - switch (upb_fielddef_type(key_field)) { - case UPB_TYPE_INT32: - upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); - break; - case UPB_TYPE_INT64: - upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); - break; - case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); - break; - case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); - break; - case UPB_TYPE_STRING: - upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); - upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); - upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); - break; - case UPB_TYPE_BYTES: - upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); - break; - default: - UPB_ASSERT(false); - break; - } - - switch (upb_fielddef_type(value_field)) { - case UPB_TYPE_INT32: - upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); - break; - case UPB_TYPE_INT64: - upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); - break; - case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); - break; - case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, value_field, putbool, &empty_attr); - break; - case UPB_TYPE_FLOAT: - upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); - break; - case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); - break; - case UPB_TYPE_STRING: - upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); - upb_handlers_setstring(h, value_field, putstr, &empty_attr); - upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); - break; - case UPB_TYPE_BYTES: - upb_handlers_setstring(h, value_field, putbytes, &empty_attr); - break; - case UPB_TYPE_ENUM: { - upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT; - set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr); - upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); - break; - } - case UPB_TYPE_MESSAGE: - /* No handler necessary -- the submsg handlers will print the message - * as appropriate. */ - break; - } -} - -static bool putseconds(void *closure, const void *handler_data, - int64_t seconds) { - upb_json_printer *p = closure; - p->seconds = seconds; - UPB_UNUSED(handler_data); - return true; -} - -static bool putnanos(void *closure, const void *handler_data, - int32_t nanos) { - upb_json_printer *p = closure; - p->nanos = nanos; - UPB_UNUSED(handler_data); - return true; -} - -static void *scalar_startstr_nokey(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_data(p, "\"", 1); - return p; -} - -static size_t putstr_nokey(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(handle); - print_data(p, "\"", 1); - putstring(p, str, len); - print_data(p, "\"", 1); - return len + 2; -} - -static void *startseq_nokey(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "[", 1); - return closure; -} - -static void *startseq_fieldmask(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_++; - p->first_elem_[p->depth_] = true; - return closure; -} - -static bool endseq_fieldmask(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_--; - return true; -} - -static void *repeated_startstr_fieldmask( - void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_comma(p); - return p; -} - -static size_t repeated_str_fieldmask( - void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - const char* limit = str + len; - bool upper = false; - size_t result_len = 0; - for (; str < limit; str++) { - if (*str == '_') { - upper = true; - continue; - } - if (upper && *str >= 'a' && *str <= 'z') { - char upper_char = toupper(*str); - CHK(putstr(closure, handler_data, &upper_char, 1, handle)); - } else { - CHK(putstr(closure, handler_data, str, 1, handle)); - } - upper = false; - result_len++; - } - return result_len; -} - -static void *startmap_nokey(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); - return closure; -} - -static bool putnull(void *closure, const void *handler_data, - int32_t null) { - upb_json_printer *p = closure; - print_data(p, "null", 4); - UPB_UNUSED(handler_data); - UPB_UNUSED(null); - return true; -} - -static bool printer_startdurationmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - return true; -} - -#define UPB_DURATION_MAX_JSON_LEN 23 -#define UPB_DURATION_MAX_NANO_LEN 9 - -static bool printer_enddurationmsg(void *closure, const void *handler_data, - upb_status *s) { - upb_json_printer *p = closure; - char buffer[UPB_DURATION_MAX_JSON_LEN]; - size_t base_len; - size_t curr; - size_t i; - - memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN); - - if (p->seconds < -315576000000) { - upb_status_seterrf(s, "error parsing duration: " - "minimum acceptable value is " - "-315576000000"); - return false; - } - - if (p->seconds > 315576000000) { - upb_status_seterrf(s, "error serializing duration: " - "maximum acceptable value is " - "315576000000"); - return false; - } - - snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds); - base_len = strlen(buffer); - - if (p->nanos != 0) { - char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3]; - snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f", - p->nanos / 1000000000.0); - /* Remove trailing 0. */ - for (i = UPB_DURATION_MAX_NANO_LEN + 2; - nanos_buffer[i] == '0'; i--) { - nanos_buffer[i] = 0; - } - strcpy(buffer + base_len, nanos_buffer + 1); - } - - curr = strlen(buffer); - strcpy(buffer + curr, "s"); - - p->seconds = 0; - p->nanos = 0; - - print_data(p, "\"", 1); - print_data(p, buffer, strlen(buffer)); - print_data(p, "\"", 1); - - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - - UPB_UNUSED(handler_data); - return true; -} - -static bool printer_starttimestampmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - return true; -} - -#define UPB_TIMESTAMP_MAX_JSON_LEN 31 -#define UPB_TIMESTAMP_BEFORE_NANO_LEN 19 -#define UPB_TIMESTAMP_MAX_NANO_LEN 9 - -static bool printer_endtimestampmsg(void *closure, const void *handler_data, - upb_status *s) { - upb_json_printer *p = closure; - char buffer[UPB_TIMESTAMP_MAX_JSON_LEN]; - time_t time = p->seconds; - size_t curr; - size_t i; - size_t year_length = - strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time)); - - if (p->seconds < -62135596800) { - upb_status_seterrf(s, "error parsing timestamp: " - "minimum acceptable value is " - "0001-01-01T00:00:00Z"); - return false; - } - - if (p->seconds > 253402300799) { - upb_status_seterrf(s, "error parsing timestamp: " - "maximum acceptable value is " - "9999-12-31T23:59:59Z"); - return false; - } - - /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */ - for (i = 0; i < 4 - year_length; i++) { - buffer[i] = '0'; - } - - strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN, - "%Y-%m-%dT%H:%M:%S", gmtime(&time)); - if (p->nanos != 0) { - char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3]; - snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f", - p->nanos / 1000000000.0); - /* Remove trailing 0. */ - for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2; - nanos_buffer[i] == '0'; i--) { - nanos_buffer[i] = 0; - } - strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1); - } - - curr = strlen(buffer); - strcpy(buffer + curr, "Z"); - - p->seconds = 0; - p->nanos = 0; - - print_data(p, "\"", 1); - print_data(p, buffer, strlen(buffer)); - print_data(p, "\"", 1); - - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - return true; -} - -static bool printer_startmsg_noframe(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - return true; -} - -static bool printer_endmsg_noframe( - void *closure, const void *handler_data, upb_status *s) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -static bool printer_startmsg_fieldmask( - void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - print_data(p, "\"", 1); - return true; -} - -static bool printer_endmsg_fieldmask( - void *closure, const void *handler_data, upb_status *s) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - print_data(p, "\"", 1); - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -static void *scalar_startstr_onlykey( - void *closure, const void *handler_data, size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(size_hint); - CHK(putkey(closure, handler_data)); - return p; -} - -/* Set up handlers for an Any submessage. */ -void printer_sethandlers_any(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE); - const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - /* type_url's json name is "@type" */ - upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT; - upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT; - strpc *type_url_json_name = newstrpc_str(h, "@type"); - strpc *value_json_name = newstrpc_str(h, "value"); - - type_name_attr.handler_data = type_url_json_name; - value_name_attr.handler_data = value_json_name; - - /* Set up handlers. */ - upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); - - upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr); - upb_handlers_setstring(h, type_field, scalar_str, &empty_attr); - upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr); - - /* This is not the full and correct JSON encoding for the Any value field. It - * requires further processing by the wrapper code based on the type URL. - */ - upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey, - &value_name_attr); - - UPB_UNUSED(closure); -} - -/* Set up handlers for a fieldmask submessage. */ -void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_fielddef* f = upb_msgdef_itof(md, 1); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr); - upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr); - - upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr); - - upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr); - upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr); - - UPB_UNUSED(closure); -} - -/* Set up handlers for a duration submessage. */ -void printer_sethandlers_duration(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - const upb_fielddef* seconds_field = - upb_msgdef_itof(md, UPB_DURATION_SECONDS); - const upb_fielddef* nanos_field = - upb_msgdef_itof(md, UPB_DURATION_NANOS); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr); - upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr); - upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr); - upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -/* Set up handlers for a timestamp submessage. Instead of printing fields - * separately, the json representation of timestamp follows RFC 3339 */ -void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - const upb_fielddef* seconds_field = - upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS); - const upb_fielddef* nanos_field = - upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr); - upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr); - upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr); - upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -void printer_sethandlers_value(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - int i, n; - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); - - n = upb_msgdef_fieldcount(md); - for (i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_ENUM: - upb_handlers_setint32(h, f, putnull, &empty_attr); - break; - case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, f, putdouble, &empty_attr); - break; - case UPB_TYPE_STRING: - upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr); - upb_handlers_setstring(h, f, scalar_str, &empty_attr); - upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, f, putbool, &empty_attr); - break; - case UPB_TYPE_MESSAGE: - break; - default: - UPB_ASSERT(false); - break; - } - } - - UPB_UNUSED(closure); -} - -#define WRAPPER_SETHANDLERS(wrapper, type, putmethod) \ -void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \ - const upb_msgdef *md = upb_handlers_msgdef(h); \ - const upb_fielddef* f = upb_msgdef_itof(md, 1); \ - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; \ - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); \ - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); \ - upb_handlers_set##type(h, f, putmethod, &empty_attr); \ - UPB_UNUSED(closure); \ -} - -WRAPPER_SETHANDLERS(doublevalue, double, putdouble) -WRAPPER_SETHANDLERS(floatvalue, float, putfloat) -WRAPPER_SETHANDLERS(int64value, int64, putint64_t) -WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t) -WRAPPER_SETHANDLERS(int32value, int32, putint32_t) -WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t) -WRAPPER_SETHANDLERS(boolvalue, bool, putbool) -WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey) -WRAPPER_SETHANDLERS(bytesvalue, string, putbytes) - -#undef WRAPPER_SETHANDLERS - -void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_fielddef* f = upb_msgdef_itof(md, 1); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr); - upb_handlers_setendseq(h, f, endseq, &empty_attr); - - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); - - upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_fielddef* f = upb_msgdef_itof(md, 1); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr); - upb_handlers_setendseq(h, f, endmap, &empty_attr); - - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); - - upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -void printer_sethandlers(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - bool is_mapentry = upb_msgdef_mapentry(md); - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - int i, n; - const upb_json_printercache *cache = closure; - const bool preserve_fieldnames = cache->preserve_fieldnames; - - if (is_mapentry) { - /* mapentry messages are sufficiently different that we handle them - * separately. */ - printer_sethandlers_mapentry(closure, preserve_fieldnames, h); - return; - } - - switch (upb_msgdef_wellknowntype(md)) { - case UPB_WELLKNOWN_UNSPECIFIED: - break; - case UPB_WELLKNOWN_ANY: - printer_sethandlers_any(closure, h); - return; - case UPB_WELLKNOWN_FIELDMASK: - printer_sethandlers_fieldmask(closure, h); - return; - case UPB_WELLKNOWN_DURATION: - printer_sethandlers_duration(closure, h); - return; - case UPB_WELLKNOWN_TIMESTAMP: - printer_sethandlers_timestamp(closure, h); - return; - case UPB_WELLKNOWN_VALUE: - printer_sethandlers_value(closure, h); - return; - case UPB_WELLKNOWN_LISTVALUE: - printer_sethandlers_listvalue(closure, h); - return; - case UPB_WELLKNOWN_STRUCT: - printer_sethandlers_structvalue(closure, h); - return; -#define WRAPPER(wellknowntype, name) \ - case wellknowntype: \ - printer_sethandlers_##name(closure, h); \ - return; \ - - WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue); - WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue); - WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value); - WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value); - WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value); - WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value); - WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue); - WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue); - WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue); - -#undef WRAPPER - } - - upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); - -#define TYPE(type, name, ctype) \ - case type: \ - if (upb_fielddef_isseq(f)) { \ - upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ - } else { \ - upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ - } \ - break; - - n = upb_msgdef_fieldcount(md); - for (i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - - upb_handlerattr name_attr = UPB_HANDLERATTR_INIT; - name_attr.handler_data = newstrpc(h, f, preserve_fieldnames); - - if (upb_fielddef_ismap(f)) { - upb_handlers_setstartseq(h, f, startmap, &name_attr); - upb_handlers_setendseq(h, f, endmap, &name_attr); - } else if (upb_fielddef_isseq(f)) { - upb_handlers_setstartseq(h, f, startseq, &name_attr); - upb_handlers_setendseq(h, f, endseq, &empty_attr); - } - - switch (upb_fielddef_type(f)) { - TYPE(UPB_TYPE_FLOAT, float, float); - TYPE(UPB_TYPE_DOUBLE, double, double); - TYPE(UPB_TYPE_BOOL, bool, bool); - TYPE(UPB_TYPE_INT32, int32, int32_t); - TYPE(UPB_TYPE_UINT32, uint32, uint32_t); - TYPE(UPB_TYPE_INT64, int64, int64_t); - TYPE(UPB_TYPE_UINT64, uint64, uint64_t); - case UPB_TYPE_ENUM: { - /* For now, we always emit symbolic names for enums. We may want an - * option later to control this behavior, but we will wait for a real - * need first. */ - upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT; - set_enum_hd(h, f, preserve_fieldnames, &enum_attr); - - if (upb_fielddef_isseq(f)) { - upb_handlers_setint32(h, f, repeated_enum, &enum_attr); - } else { - upb_handlers_setint32(h, f, scalar_enum, &enum_attr); - } - - break; - } - case UPB_TYPE_STRING: - if (upb_fielddef_isseq(f)) { - upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); - upb_handlers_setstring(h, f, repeated_str, &empty_attr); - upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); - } else { - upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); - upb_handlers_setstring(h, f, scalar_str, &empty_attr); - upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); - } - break; - case UPB_TYPE_BYTES: - /* XXX: this doesn't support strings that span buffers yet. The base64 - * encoder will need to be made resumable for this to work properly. */ - if (upb_fielddef_isseq(f)) { - upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); - } else { - upb_handlers_setstring(h, f, scalar_bytes, &name_attr); - } - break; - case UPB_TYPE_MESSAGE: - if (upb_fielddef_isseq(f)) { - upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr); - } else { - upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr); - } - break; - } - } - -#undef TYPE -} - -static void json_printer_reset(upb_json_printer *p) { - p->depth_ = 0; -} - - -/* Public API *****************************************************************/ - -upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h, - upb_bytessink output) { - upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer)); - if (!p) return NULL; - - p->output_ = output; - json_printer_reset(p); - upb_sink_reset(&p->input_, h, p); - p->seconds = 0; - p->nanos = 0; - - return p; -} - -upb_sink upb_json_printer_input(upb_json_printer *p) { - return p->input_; -} - -upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) { - upb_json_printercache *cache = upb_gmalloc(sizeof(*cache)); - upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache); - - cache->preserve_fieldnames = preserve_proto_fieldnames; - upb_handlercache_addcleanup(ret, cache, upb_gfree); - - return ret; -} diff --git a/upb/json/printer.h b/upb/json/printer.h deleted file mode 100644 index 85b9b128f9..0000000000 --- a/upb/json/printer.h +++ /dev/null @@ -1,72 +0,0 @@ -/* -** upb::json::Printer -** -** Handlers that emit JSON according to a specific protobuf schema. -*/ - -#ifndef UPB_JSON_TYPED_PRINTER_H_ -#define UPB_JSON_TYPED_PRINTER_H_ - -#include "upb/sink.h" - -#ifdef __cplusplus -namespace upb { -namespace json { -class PrinterPtr; -} /* namespace json */ -} /* namespace upb */ -#endif - -/* upb_json_printer ***********************************************************/ - -#define UPB_JSON_PRINTER_SIZE 192 - -struct upb_json_printer; -typedef struct upb_json_printer upb_json_printer; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Native C API. */ -upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h, - upb_bytessink output); -upb_sink upb_json_printer_input(upb_json_printer *p); -const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md, - bool preserve_fieldnames, - const void *owner); - -/* Lazily builds and caches handlers that will push encoded data to a bytessink. - * Any msgdef objects used with this object must outlive it. */ -upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames); - -#ifdef __cplusplus -} /* extern "C" */ - -/* Prints an incoming stream of data to a BytesSink in JSON format. */ -class upb::json::PrinterPtr { - public: - PrinterPtr(upb_json_printer* ptr) : ptr_(ptr) {} - - static PrinterPtr Create(Arena *arena, const upb::Handlers *handlers, - BytesSink output) { - return PrinterPtr( - upb_json_printer_create(arena->ptr(), handlers, output.sink())); - } - - /* The input to the printer. */ - Sink input() { return upb_json_printer_input(ptr_); } - - static const size_t kSize = UPB_JSON_PRINTER_SIZE; - - static HandlerCache NewCache(bool preserve_proto_fieldnames) { - return upb_json_printer_newcache(preserve_proto_fieldnames); - } - - private: - upb_json_printer* ptr_; -}; - -#endif /* __cplusplus */ - -#endif /* UPB_JSON_TYPED_PRINTER_H_ */ diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c deleted file mode 100644 index 3c73f1a93d..0000000000 --- a/upb/pb/compile_decoder.c +++ /dev/null @@ -1,919 +0,0 @@ -/* -** protobuf decoder bytecode compiler -** -** Code to compile a upb::Handlers into bytecode for decoding a protobuf -** according to that specific schema and destination handlers. -** -** Bytecode definition is in decoder.int.h. -*/ - -#include -#include "upb/pb/decoder.int.h" -#include "upb/pb/varint.int.h" - -#ifdef UPB_DUMP_BYTECODE -#include -#endif - -#include "upb/port_def.inc" - -#define MAXLABEL 5 -#define EMPTYLABEL -1 - -/* upb_pbdecodermethod ********************************************************/ - -static void freemethod(upb_pbdecodermethod *method) { - upb_inttable_uninit(&method->dispatch); - upb_gfree(method); -} - -static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers, - mgroup *group) { - upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret)); - upb_byteshandler_init(&ret->input_handler_); - - ret->group = group; - ret->dest_handlers_ = dest_handlers; - upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); - - return ret; -} - -const upb_handlers *upb_pbdecodermethod_desthandlers( - const upb_pbdecodermethod *m) { - return m->dest_handlers_; -} - -const upb_byteshandler *upb_pbdecodermethod_inputhandler( - const upb_pbdecodermethod *m) { - return &m->input_handler_; -} - -bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) { - return m->is_native_; -} - - -/* mgroup *********************************************************************/ - -static void freegroup(mgroup *g) { - upb_inttable_iter i; - - upb_inttable_begin(&i, &g->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - freemethod(upb_value_getptr(upb_inttable_iter_value(&i))); - } - - upb_inttable_uninit(&g->methods); - upb_gfree(g->bytecode); - upb_gfree(g); -} - -mgroup *newgroup(void) { - mgroup *g = upb_gmalloc(sizeof(*g)); - upb_inttable_init(&g->methods, UPB_CTYPE_PTR); - g->bytecode = NULL; - g->bytecode_end = NULL; - return g; -} - - -/* bytecode compiler **********************************************************/ - -/* Data used only at compilation time. */ -typedef struct { - mgroup *group; - - uint32_t *pc; - int fwd_labels[MAXLABEL]; - int back_labels[MAXLABEL]; - - /* For fields marked "lazy", parse them lazily or eagerly? */ - bool lazy; -} compiler; - -static compiler *newcompiler(mgroup *group, bool lazy) { - compiler *ret = upb_gmalloc(sizeof(*ret)); - int i; - - ret->group = group; - ret->lazy = lazy; - for (i = 0; i < MAXLABEL; i++) { - ret->fwd_labels[i] = EMPTYLABEL; - ret->back_labels[i] = EMPTYLABEL; - } - return ret; -} - -static void freecompiler(compiler *c) { - upb_gfree(c); -} - -const size_t ptr_words = sizeof(void*) / sizeof(uint32_t); - -/* How many words an instruction is. */ -static int instruction_len(uint32_t instr) { - switch (getop(instr)) { - case OP_SETDISPATCH: return 1 + ptr_words; - case OP_TAGN: return 3; - case OP_SETBIGGROUPNUM: return 2; - default: return 1; - } -} - -bool op_has_longofs(int32_t instruction) { - switch (getop(instruction)) { - case OP_CALL: - case OP_BRANCH: - case OP_CHECKDELIM: - return true; - /* The "tag" instructions only have 8 bytes available for the jump target, - * but that is ok because these opcodes only require short jumps. */ - case OP_TAG1: - case OP_TAG2: - case OP_TAGN: - return false; - default: - UPB_ASSERT(false); - return false; - } -} - -static int32_t getofs(uint32_t instruction) { - if (op_has_longofs(instruction)) { - return (int32_t)instruction >> 8; - } else { - return (int8_t)(instruction >> 8); - } -} - -static void setofs(uint32_t *instruction, int32_t ofs) { - if (op_has_longofs(*instruction)) { - *instruction = getop(*instruction) | (uint32_t)ofs << 8; - } else { - *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8); - } - UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */ -} - -static uint32_t pcofs(compiler *c) { - return (uint32_t)(c->pc - c->group->bytecode); -} - -/* Defines a local label at the current PC location. All previous forward - * references are updated to point to this location. The location is noted - * for any future backward references. */ -static void label(compiler *c, unsigned int label) { - int val; - uint32_t *codep; - - UPB_ASSERT(label < MAXLABEL); - val = c->fwd_labels[label]; - codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val; - while (codep) { - int ofs = getofs(*codep); - setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep))); - codep = ofs ? codep + ofs : NULL; - } - c->fwd_labels[label] = EMPTYLABEL; - c->back_labels[label] = pcofs(c); -} - -/* Creates a reference to a numbered label; either a forward reference - * (positive arg) or backward reference (negative arg). For forward references - * the value returned now is actually a "next" pointer into a linked list of all - * instructions that use this label and will be patched later when the label is - * defined with label(). - * - * The returned value is the offset that should be written into the instruction. - */ -static int32_t labelref(compiler *c, int label) { - UPB_ASSERT(label < MAXLABEL); - if (label == LABEL_DISPATCH) { - /* No resolving required. */ - return 0; - } else if (label < 0) { - /* Backward local label. Relative to the next instruction. */ - uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode); - return c->back_labels[-label] - from; - } else { - /* Forward local label: prepend to (possibly-empty) linked list. */ - int *lptr = &c->fwd_labels[label]; - int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c); - *lptr = pcofs(c); - return ret; - } -} - -static void put32(compiler *c, uint32_t v) { - mgroup *g = c->group; - if (c->pc == g->bytecode_end) { - int ofs = pcofs(c); - size_t oldsize = g->bytecode_end - g->bytecode; - size_t newsize = UPB_MAX(oldsize * 2, 64); - /* TODO(haberman): handle OOM. */ - g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t), - newsize * sizeof(uint32_t)); - g->bytecode_end = g->bytecode + newsize; - c->pc = g->bytecode + ofs; - } - *c->pc++ = v; -} - -static void putop(compiler *c, int op, ...) { - va_list ap; - va_start(ap, op); - - switch (op) { - case OP_SETDISPATCH: { - uintptr_t ptr = (uintptr_t)va_arg(ap, void*); - put32(c, OP_SETDISPATCH); - put32(c, (uint32_t)ptr); - if (sizeof(uintptr_t) > sizeof(uint32_t)) - put32(c, (uint64_t)ptr >> 32); - break; - } - case OP_STARTMSG: - case OP_ENDMSG: - case OP_PUSHLENDELIM: - case OP_POP: - case OP_SETDELIM: - case OP_HALT: - case OP_RET: - case OP_DISPATCH: - put32(c, op); - break; - case OP_PARSE_DOUBLE: - case OP_PARSE_FLOAT: - case OP_PARSE_INT64: - case OP_PARSE_UINT64: - case OP_PARSE_INT32: - case OP_PARSE_FIXED64: - case OP_PARSE_FIXED32: - case OP_PARSE_BOOL: - case OP_PARSE_UINT32: - case OP_PARSE_SFIXED32: - case OP_PARSE_SFIXED64: - case OP_PARSE_SINT32: - case OP_PARSE_SINT64: - case OP_STARTSEQ: - case OP_ENDSEQ: - case OP_STARTSUBMSG: - case OP_ENDSUBMSG: - case OP_STARTSTR: - case OP_STRING: - case OP_ENDSTR: - case OP_PUSHTAGDELIM: - put32(c, op | va_arg(ap, upb_selector_t) << 8); - break; - case OP_SETBIGGROUPNUM: - put32(c, op); - put32(c, va_arg(ap, int)); - break; - case OP_CALL: { - const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *); - put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8); - break; - } - case OP_CHECKDELIM: - case OP_BRANCH: { - uint32_t instruction = op; - int label = va_arg(ap, int); - setofs(&instruction, labelref(c, label)); - put32(c, instruction); - break; - } - case OP_TAG1: - case OP_TAG2: { - int label = va_arg(ap, int); - uint64_t tag = va_arg(ap, uint64_t); - uint32_t instruction = (uint32_t)(op | (tag << 16)); - UPB_ASSERT(tag <= 0xffff); - setofs(&instruction, labelref(c, label)); - put32(c, instruction); - break; - } - case OP_TAGN: { - int label = va_arg(ap, int); - uint64_t tag = va_arg(ap, uint64_t); - uint32_t instruction = op | (upb_value_size(tag) << 16); - setofs(&instruction, labelref(c, label)); - put32(c, instruction); - put32(c, (uint32_t)tag); - put32(c, tag >> 32); - break; - } - } - - va_end(ap); -} - -#if defined(UPB_DUMP_BYTECODE) - -const char *upb_pbdecoder_getopname(unsigned int op) { -#define QUOTE(x) #x -#define EXPAND_AND_QUOTE(x) QUOTE(x) -#define OPNAME(x) OP_##x -#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x)); -#define T(x) OP(PARSE_##x) - /* Keep in sync with list in decoder.int.h. */ - switch ((opcode)op) { - T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32) - T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64) - OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG) - OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET) - OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM) - OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP) - OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT) - } - return ""; -#undef OP -#undef T -} - -#endif - -#ifdef UPB_DUMP_BYTECODE - -static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { - - uint32_t *begin = p; - - while (p < end) { - fprintf(f, "%p %8tx", p, p - begin); - uint32_t instr = *p++; - uint8_t op = getop(instr); - fprintf(f, " %s", upb_pbdecoder_getopname(op)); - switch ((opcode)op) { - case OP_SETDISPATCH: { - const upb_inttable *dispatch; - memcpy(&dispatch, p, sizeof(void*)); - p += ptr_words; - const upb_pbdecodermethod *method = - (void *)((char *)dispatch - - offsetof(upb_pbdecodermethod, dispatch)); - fprintf(f, " %s", upb_msgdef_fullname( - upb_handlers_msgdef(method->dest_handlers_))); - break; - } - case OP_DISPATCH: - case OP_STARTMSG: - case OP_ENDMSG: - case OP_PUSHLENDELIM: - case OP_POP: - case OP_SETDELIM: - case OP_HALT: - case OP_RET: - break; - case OP_PARSE_DOUBLE: - case OP_PARSE_FLOAT: - case OP_PARSE_INT64: - case OP_PARSE_UINT64: - case OP_PARSE_INT32: - case OP_PARSE_FIXED64: - case OP_PARSE_FIXED32: - case OP_PARSE_BOOL: - case OP_PARSE_UINT32: - case OP_PARSE_SFIXED32: - case OP_PARSE_SFIXED64: - case OP_PARSE_SINT32: - case OP_PARSE_SINT64: - case OP_STARTSEQ: - case OP_ENDSEQ: - case OP_STARTSUBMSG: - case OP_ENDSUBMSG: - case OP_STARTSTR: - case OP_STRING: - case OP_ENDSTR: - case OP_PUSHTAGDELIM: - fprintf(f, " %d", instr >> 8); - break; - case OP_SETBIGGROUPNUM: - fprintf(f, " %d", *p++); - break; - case OP_CHECKDELIM: - case OP_CALL: - case OP_BRANCH: - fprintf(f, " =>0x%tx", p + getofs(instr) - begin); - break; - case OP_TAG1: - case OP_TAG2: { - fprintf(f, " tag:0x%x", instr >> 16); - if (getofs(instr)) { - fprintf(f, " =>0x%tx", p + getofs(instr) - begin); - } - break; - } - case OP_TAGN: { - uint64_t tag = *p++; - tag |= (uint64_t)*p++ << 32; - fprintf(f, " tag:0x%llx", (long long)tag); - fprintf(f, " n:%d", instr >> 16); - if (getofs(instr)) { - fprintf(f, " =>0x%tx", p + getofs(instr) - begin); - } - break; - } - } - fputs("\n", f); - } -} - -#endif - -static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) { - uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type; - uint64_t encoded_tag = upb_vencode32(tag); - /* No tag should be greater than 5 bytes. */ - UPB_ASSERT(encoded_tag <= 0xffffffffff); - return encoded_tag; -} - -static void putchecktag(compiler *c, const upb_fielddef *f, - int wire_type, int dest) { - uint64_t tag = get_encoded_tag(f, wire_type); - switch (upb_value_size(tag)) { - case 1: - putop(c, OP_TAG1, dest, tag); - break; - case 2: - putop(c, OP_TAG2, dest, tag); - break; - default: - putop(c, OP_TAGN, dest, tag); - break; - } -} - -static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { - upb_selector_t selector; - bool ok = upb_handlers_getselector(f, type, &selector); - UPB_ASSERT(ok); - return selector; -} - -/* Takes an existing, primary dispatch table entry and repacks it with a - * different alternate wire type. Called when we are inserting a secondary - * dispatch table entry for an alternate wire type. */ -static uint64_t repack(uint64_t dispatch, int new_wt2) { - uint64_t ofs; - uint8_t wt1; - uint8_t old_wt2; - upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2); - UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */ - return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2); -} - -/* Marks the current bytecode position as the dispatch target for this message, - * field, and wire type. */ -static void dispatchtarget(compiler *c, upb_pbdecodermethod *method, - const upb_fielddef *f, int wire_type) { - /* Offset is relative to msg base. */ - uint64_t ofs = pcofs(c) - method->code_base.ofs; - uint32_t fn = upb_fielddef_number(f); - upb_inttable *d = &method->dispatch; - upb_value v; - if (upb_inttable_remove(d, fn, &v)) { - /* TODO: prioritize based on packed setting in .proto file. */ - uint64_t repacked = repack(upb_value_getuint64(v), wire_type); - upb_inttable_insert(d, fn, upb_value_uint64(repacked)); - upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs)); - } else { - uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE); - upb_inttable_insert(d, fn, upb_value_uint64(val)); - } -} - -static void putpush(compiler *c, const upb_fielddef *f) { - if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { - putop(c, OP_PUSHLENDELIM); - } else { - uint32_t fn = upb_fielddef_number(f); - if (fn >= 1 << 24) { - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_SETBIGGROUPNUM, fn); - } else { - putop(c, OP_PUSHTAGDELIM, fn); - } - } -} - -static upb_pbdecodermethod *find_submethod(const compiler *c, - const upb_pbdecodermethod *method, - const upb_fielddef *f) { - const upb_handlers *sub = - upb_handlers_getsubhandlers(method->dest_handlers_, f); - upb_value v; - return upb_inttable_lookupptr(&c->group->methods, sub, &v) - ? upb_value_getptr(v) - : NULL; -} - -static void putsel(compiler *c, opcode op, upb_selector_t sel, - const upb_handlers *h) { - if (upb_handlers_gethandler(h, sel, NULL)) { - putop(c, op, sel); - } -} - -/* Puts an opcode to call a callback, but only if a callback actually exists for - * this field and handler type. */ -static void maybeput(compiler *c, opcode op, const upb_handlers *h, - const upb_fielddef *f, upb_handlertype_t type) { - putsel(c, op, getsel(f, type), h); -} - -static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) { - if (!upb_fielddef_lazy(f)) - return false; - - return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) || - upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) || - upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL); -} - - -/* bytecode compiler code generation ******************************************/ - -/* Symbolic names for our local labels. */ -#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */ -#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */ -#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */ -#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */ - -/* Generates bytecode to parse a single non-lazy message field. */ -static void generate_msgfield(compiler *c, const upb_fielddef *f, - upb_pbdecodermethod *method) { - const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); - const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); - int wire_type; - - if (!sub_m) { - /* Don't emit any code for this field at all; it will be parsed as an - * unknown field. - * - * TODO(haberman): we should change this to parse it as a string field - * instead. It will probably be faster, but more importantly, once we - * start vending unknown fields, a field shouldn't be treated as unknown - * just because it doesn't have subhandlers registered. */ - return; - } - - label(c, LABEL_FIELD); - - wire_type = - (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) - ? UPB_WIRE_TYPE_DELIMITED - : UPB_WIRE_TYPE_START_GROUP; - - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - } -} - -/* Generates bytecode to parse a single string or lazy submessage field. */ -static void generate_delimfield(compiler *c, const upb_fielddef *f, - upb_pbdecodermethod *method) { - const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); - - label(c, LABEL_FIELD); - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - /* Need to emit even if no handler to skip past the string. */ - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); - putop(c, OP_POP); - putop(c, OP_SETDELIM); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); - putop(c, OP_POP); - putop(c, OP_SETDELIM); - } -} - -/* Generates bytecode to parse a single primitive field. */ -static void generate_primitivefield(compiler *c, const upb_fielddef *f, - upb_pbdecodermethod *method) { - const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); - upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); - opcode parse_type; - upb_selector_t sel; - int wire_type; - - label(c, LABEL_FIELD); - - /* From a decoding perspective, ENUM is the same as INT32. */ - if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) - descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; - - parse_type = (opcode)descriptor_type; - - /* TODO(haberman): generate packed or non-packed first depending on "packed" - * setting in the fielddef. This will favor (in speed) whichever was - * specified. */ - - UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX); - sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */ - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */ - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); /* Packed and non-packed join. */ - maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */ - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, parse_type, sel); - } -} - -/* Adds bytecode for parsing the given message to the given decoderplan, - * while adding all dispatch targets to this message's dispatch table. */ -static void compile_method(compiler *c, upb_pbdecodermethod *method) { - const upb_handlers *h; - const upb_msgdef *md; - uint32_t* start_pc; - int i, n; - upb_value val; - - UPB_ASSERT(method); - - /* Clear all entries in the dispatch table. */ - upb_inttable_uninit(&method->dispatch); - upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); - - h = upb_pbdecodermethod_desthandlers(method); - md = upb_handlers_msgdef(h); - - method->code_base.ofs = pcofs(c); - putop(c, OP_SETDISPATCH, &method->dispatch); - putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); - label(c, LABEL_FIELD); - start_pc = c->pc; - n = upb_msgdef_fieldcount(md); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - upb_fieldtype_t type = upb_fielddef_type(f); - - if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) { - generate_msgfield(c, f, method); - } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES || - type == UPB_TYPE_MESSAGE) { - generate_delimfield(c, f, method); - } else { - generate_primitivefield(c, f, method); - } - } - - /* If there were no fields, or if no handlers were defined, we need to - * generate a non-empty loop body so that we can at least dispatch for unknown - * fields and check for the end of the message. */ - if (c->pc == start_pc) { - /* Check for end-of-message. */ - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - /* Unconditionally dispatch. */ - putop(c, OP_DISPATCH, 0); - } - - /* For now we just loop back to the last field of the message (or if none, - * the DISPATCH opcode for the message). */ - putop(c, OP_BRANCH, -LABEL_FIELD); - - /* Insert both a label and a dispatch table entry for this end-of-msg. */ - label(c, LABEL_ENDMSG); - val = upb_value_uint64(pcofs(c) - method->code_base.ofs); - upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val); - - putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h); - putop(c, OP_RET); - - upb_inttable_compact(&method->dispatch); -} - -/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h". - * Returns the method for these handlers. - * - * Generates a new method for every destination handlers reachable from "h". */ -static void find_methods(compiler *c, const upb_handlers *h) { - upb_value v; - int i, n; - const upb_msgdef *md; - upb_pbdecodermethod *method; - - if (upb_inttable_lookupptr(&c->group->methods, h, &v)) - return; - - method = newmethod(h, c->group); - upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method)); - - /* Find submethods. */ - md = upb_handlers_msgdef(h); - n = upb_msgdef_fieldcount(md); - for (i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(md, i); - const upb_handlers *sub_h; - if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && - (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) { - /* We only generate a decoder method for submessages with handlers. - * Others will be parsed as unknown fields. */ - find_methods(c, sub_h); - } - } -} - -/* (Re-)compile bytecode for all messages in "msgs." - * Overwrites any existing bytecode in "c". */ -static void compile_methods(compiler *c) { - upb_inttable_iter i; - - /* Start over at the beginning of the bytecode. */ - c->pc = c->group->bytecode; - - upb_inttable_begin(&i, &c->group->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); - compile_method(c, method); - } -} - -static void set_bytecode_handlers(mgroup *g) { - upb_inttable_iter i; - upb_inttable_begin(&i, &g->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i)); - upb_byteshandler *h = &m->input_handler_; - - m->code_base.ptr = g->bytecode + m->code_base.ofs; - - upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr); - upb_byteshandler_setstring(h, upb_pbdecoder_decode, g); - upb_byteshandler_setendstr(h, upb_pbdecoder_end, m); - } -} - - -/* TODO(haberman): allow this to be constructed for an arbitrary set of dest - * handlers and other mgroups (but verify we have a transitive closure). */ -const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) { - mgroup *g; - compiler *c; - - g = newgroup(); - c = newcompiler(g, lazy); - find_methods(c, dest); - - /* We compile in two passes: - * 1. all messages are assigned relative offsets from the beginning of the - * bytecode (saved in method->code_base). - * 2. forwards OP_CALL instructions can be correctly linked since message - * offsets have been previously assigned. - * - * Could avoid the second pass by linking OP_CALL instructions somehow. */ - compile_methods(c); - compile_methods(c); - g->bytecode_end = c->pc; - freecompiler(c); - -#ifdef UPB_DUMP_BYTECODE - { - FILE *f = fopen("/tmp/upb-bytecode", "w"); - UPB_ASSERT(f); - dumpbc(g->bytecode, g->bytecode_end, stderr); - dumpbc(g->bytecode, g->bytecode_end, f); - fclose(f); - - f = fopen("/tmp/upb-bytecode.bin", "wb"); - UPB_ASSERT(f); - fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f); - fclose(f); - } -#endif - - set_bytecode_handlers(g); - return g; -} - - -/* upb_pbcodecache ************************************************************/ - -upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) { - upb_pbcodecache *c = upb_gmalloc(sizeof(*c)); - - if (!c) return NULL; - - c->dest = dest; - c->lazy = false; - - c->arena = upb_arena_new(); - if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL; - - return c; -} - -void upb_pbcodecache_free(upb_pbcodecache *c) { - upb_inttable_iter i; - - upb_inttable_begin(&i, &c->groups); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_value val = upb_inttable_iter_value(&i); - freegroup((void*)upb_value_getconstptr(val)); - } - - upb_inttable_uninit(&c->groups); - upb_arena_free(c->arena); - upb_gfree(c); -} - -void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) { - UPB_ASSERT(upb_inttable_count(&c->groups) == 0); - c->lazy = lazy; -} - -const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c, - const upb_msgdef *md) { - upb_value v; - bool ok; - const upb_handlers *h; - const mgroup *g; - - h = upb_handlercache_get(c->dest, md); - if (upb_inttable_lookupptr(&c->groups, md, &v)) { - g = upb_value_getconstptr(v); - } else { - g = mgroup_new(h, c->lazy); - ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g)); - UPB_ASSUME(ok); - } - - ok = upb_inttable_lookupptr(&g->methods, h, &v); - UPB_ASSUME(ok); - return upb_value_getptr(v); -} diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c deleted file mode 100644 index 4f40eed4be..0000000000 --- a/upb/pb/decoder.c +++ /dev/null @@ -1,1047 +0,0 @@ -/* -** upb::Decoder (Bytecode Decoder VM) -** -** Bytecode must previously have been generated using the bytecode compiler in -** compile_decoder.c. This decoder then walks through the bytecode op-by-op to -** parse the input. -** -** Decoding is fully resumable; we just keep a pointer to the current bytecode -** instruction and resume from there. A fair amount of the logic here is to -** handle the fact that values can span buffer seams and we have to be able to -** be capable of suspending/resuming from any byte in the stream. This -** sometimes requires keeping a few trailing bytes from the last buffer around -** in the "residual" buffer. -*/ - -#include -#include -#include "upb/pb/decoder.int.h" -#include "upb/pb/varint.int.h" - -#ifdef UPB_DUMP_BYTECODE -#include -#endif - -#include "upb/port_def.inc" - -#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); - -/* Error messages that are shared between the bytecode and JIT decoders. */ -const char *kPbDecoderStackOverflow = "Nesting too deep."; -const char *kPbDecoderSubmessageTooLong = - "Submessage end extends past enclosing submessage."; - -/* Error messages shared within this file. */ -static const char *kUnterminatedVarint = "Unterminated varint."; - -/* upb_pbdecoder **************************************************************/ - -static opcode halt = OP_HALT; - -/* A dummy character we can point to when the user passes us a NULL buffer. - * We need this because in C (NULL + 0) and (NULL - NULL) are undefined - * behavior, which would invalidate functions like curbufleft(). */ -static const char dummy_char; - -/* Whether an op consumes any of the input buffer. */ -static bool consumes_input(opcode op) { - switch (op) { - case OP_SETDISPATCH: - case OP_STARTMSG: - case OP_ENDMSG: - case OP_STARTSEQ: - case OP_ENDSEQ: - case OP_STARTSUBMSG: - case OP_ENDSUBMSG: - case OP_STARTSTR: - case OP_ENDSTR: - case OP_PUSHTAGDELIM: - case OP_POP: - case OP_SETDELIM: - case OP_SETBIGGROUPNUM: - case OP_CHECKDELIM: - case OP_CALL: - case OP_RET: - case OP_BRANCH: - return false; - default: - return true; - } -} - -static size_t stacksize(upb_pbdecoder *d, size_t entries) { - UPB_UNUSED(d); - return entries * sizeof(upb_pbdecoder_frame); -} - -static size_t callstacksize(upb_pbdecoder *d, size_t entries) { - UPB_UNUSED(d); - - return entries * sizeof(uint32_t*); -} - - -static bool in_residual_buf(const upb_pbdecoder *d, const char *p); - -/* It's unfortunate that we have to micro-manage the compiler with - * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily - * specific to one hardware configuration. But empirically on a Core i7, - * performance increases 30-50% with these annotations. Every instance where - * these appear, gcc 4.2.1 made the wrong decision and degraded performance in - * benchmarks. */ - -static void seterr(upb_pbdecoder *d, const char *msg) { - upb_status_seterrmsg(d->status, msg); -} - -void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { - seterr(d, msg); -} - - -/* Buffering ******************************************************************/ - -/* We operate on one buffer at a time, which is either the user's buffer passed - * to our "decode" callback or some residual bytes from the previous buffer. */ - -/* How many bytes can be safely read from d->ptr without reading past end-of-buf - * or past the current delimited end. */ -static size_t curbufleft(const upb_pbdecoder *d) { - UPB_ASSERT(d->data_end >= d->ptr); - return d->data_end - d->ptr; -} - -/* How many bytes are available before end-of-buffer. */ -static size_t bufleft(const upb_pbdecoder *d) { - return d->end - d->ptr; -} - -/* Overall stream offset of d->ptr. */ -uint64_t offset(const upb_pbdecoder *d) { - return d->bufstart_ofs + (d->ptr - d->buf); -} - -/* How many bytes are available before the end of this delimited region. */ -size_t delim_remaining(const upb_pbdecoder *d) { - return d->top->end_ofs - offset(d); -} - -/* Advances d->ptr. */ -static void advance(upb_pbdecoder *d, size_t len) { - UPB_ASSERT(curbufleft(d) >= len); - d->ptr += len; -} - -static bool in_buf(const char *p, const char *buf, const char *end) { - return p >= buf && p <= end; -} - -static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { - return in_buf(p, d->residual, d->residual_end); -} - -/* Calculates the delim_end value, which is affected by both the current buffer - * and the parsing stack, so must be called whenever either is updated. */ -static void set_delim_end(upb_pbdecoder *d) { - size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; - if (delim_ofs <= (size_t)(d->end - d->buf)) { - d->delim_end = d->buf + delim_ofs; - d->data_end = d->delim_end; - } else { - d->data_end = d->end; - d->delim_end = NULL; - } -} - -static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { - d->ptr = buf; - d->buf = buf; - d->end = end; - set_delim_end(d); -} - -static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { - UPB_ASSERT(curbufleft(d) == 0); - d->bufstart_ofs += (d->end - d->buf); - switchtobuf(d, buf, buf + len); -} - -static void checkpoint(upb_pbdecoder *d) { - /* The assertion here is in the interests of efficiency, not correctness. - * We are trying to ensure that we don't checkpoint() more often than - * necessary. */ - UPB_ASSERT(d->checkpoint != d->ptr); - d->checkpoint = d->ptr; -} - -/* Skips "bytes" bytes in the stream, which may be more than available. If we - * skip more bytes than are available, we return a long read count to the caller - * indicating how many bytes can be skipped over before passing actual data - * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they - * won't actually be read. - */ -static int32_t skip(upb_pbdecoder *d, size_t bytes) { - UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0); - UPB_ASSERT(d->skip == 0); - if (bytes > delim_remaining(d)) { - seterr(d, "Skipped value extended beyond enclosing submessage."); - return (int32_t)upb_pbdecoder_suspend(d); - } else if (bufleft(d) >= bytes) { - /* Skipped data is all in current buffer, and more is still available. */ - advance(d, bytes); - d->skip = 0; - return DECODE_OK; - } else { - /* Skipped data extends beyond currently available buffers. */ - d->pc = d->last; - d->skip = bytes - curbufleft(d); - d->bufstart_ofs += (d->end - d->buf); - d->residual_end = d->residual; - switchtobuf(d, d->residual, d->residual_end); - return (int32_t)(d->size_param + d->skip); - } -} - - -/* Resumes the decoder from an initial state or from a previous suspend. */ -int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle) { - UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ - - /* d->skip and d->residual_end could probably elegantly be represented - * as a single variable, to more easily represent this invariant. */ - UPB_ASSERT(!(d->skip && d->residual_end > d->residual)); - - /* We need to remember the original size_param, so that the value we return - * is relative to it, even if we do some skipping first. */ - d->size_param = size; - d->handle = handle; - - /* Have to handle this case specially (ie. not with skip()) because the user - * is allowed to pass a NULL buffer here, which won't allow us to safely - * calculate a d->end or use our normal functions like curbufleft(). */ - if (d->skip && d->skip >= size) { - d->skip -= size; - d->bufstart_ofs += size; - buf = &dummy_char; - size = 0; - - /* We can't just return now, because we might need to execute some ops - * like CHECKDELIM, which could call some callbacks and pop the stack. */ - } - - /* We need to pretend that this was the actual buffer param, since some of the - * calculations assume that d->ptr/d->buf is relative to this. */ - d->buf_param = buf; - - if (!buf) { - /* NULL buf is ok if its entire span is covered by the "skip" above, but - * by this point we know that "skip" doesn't cover the buffer. */ - seterr(d, "Passed NULL buffer over non-skippable region."); - return (int32_t)upb_pbdecoder_suspend(d); - } - - if (d->residual_end > d->residual) { - /* We have residual bytes from the last buffer. */ - UPB_ASSERT(d->ptr == d->residual); - } else { - switchtobuf(d, buf, buf + size); - } - - d->checkpoint = d->ptr; - - /* Handle skips that don't cover the whole buffer (as above). */ - if (d->skip) { - size_t skip_bytes = d->skip; - d->skip = 0; - CHECK_RETURN(skip(d, skip_bytes)); - checkpoint(d); - } - - /* If we're inside an unknown group, continue to parse unknown values. */ - if (d->top->groupnum < 0) { - CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); - checkpoint(d); - } - - return DECODE_OK; -} - -/* Suspends the decoder at the last checkpoint, without saving any residual - * bytes. If there are any unconsumed bytes, returns a short byte count. */ -size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { - d->pc = d->last; - if (d->checkpoint == d->residual) { - /* Checkpoint was in residual buf; no user bytes were consumed. */ - d->ptr = d->residual; - return 0; - } else { - size_t ret = d->size_param - (d->end - d->checkpoint); - UPB_ASSERT(!in_residual_buf(d, d->checkpoint)); - UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char); - - d->bufstart_ofs += (d->checkpoint - d->buf); - d->residual_end = d->residual; - switchtobuf(d, d->residual, d->residual_end); - return ret; - } -} - -/* Suspends the decoder at the last checkpoint, and saves any unconsumed - * bytes in our residual buffer. This is necessary if we need more user - * bytes to form a complete value, which might not be contiguous in the - * user's buffers. Always consumes all user bytes. */ -static size_t suspend_save(upb_pbdecoder *d) { - /* We hit end-of-buffer before we could parse a full value. - * Save any unconsumed bytes (if any) to the residual buffer. */ - d->pc = d->last; - - if (d->checkpoint == d->residual) { - /* Checkpoint was in residual buf; append user byte(s) to residual buf. */ - UPB_ASSERT((d->residual_end - d->residual) + d->size_param <= - sizeof(d->residual)); - if (!in_residual_buf(d, d->ptr)) { - d->bufstart_ofs -= (d->residual_end - d->residual); - } - memcpy(d->residual_end, d->buf_param, d->size_param); - d->residual_end += d->size_param; - } else { - /* Checkpoint was in user buf; old residual bytes not needed. */ - size_t save; - UPB_ASSERT(!in_residual_buf(d, d->checkpoint)); - - d->ptr = d->checkpoint; - save = curbufleft(d); - UPB_ASSERT(save <= sizeof(d->residual)); - memcpy(d->residual, d->ptr, save); - d->residual_end = d->residual + save; - d->bufstart_ofs = offset(d); - } - - switchtobuf(d, d->residual, d->residual_end); - return d->size_param; -} - -/* Copies the next "bytes" bytes into "buf" and advances the stream. - * Requires that this many bytes are available in the current buffer. */ -UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, - size_t bytes) { - UPB_ASSERT(bytes <= curbufleft(d)); - memcpy(buf, d->ptr, bytes); - advance(d, bytes); -} - -/* Slow path for getting the next "bytes" bytes, regardless of whether they are - * available in the current buffer or not. Returns a status code as described - * in decoder.int.h. */ -UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { - const size_t avail = curbufleft(d); - consumebytes(d, buf, avail); - bytes -= avail; - UPB_ASSERT(bytes > 0); - if (in_residual_buf(d, d->ptr)) { - advancetobuf(d, d->buf_param, d->size_param); - } - if (curbufleft(d) >= bytes) { - consumebytes(d, (char *)buf + avail, bytes); - return DECODE_OK; - } else if (d->data_end == d->delim_end) { - seterr(d, "Submessage ended in the middle of a value or group"); - return (int32_t)upb_pbdecoder_suspend(d); - } else { - return (int32_t)suspend_save(d); - } -} - -/* Gets the next "bytes" bytes, regardless of whether they are available in the - * current buffer or not. Returns a status code as described in decoder.int.h. - */ -UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf, - size_t bytes) { - if (curbufleft(d) >= bytes) { - /* Buffer has enough data to satisfy. */ - consumebytes(d, buf, bytes); - return DECODE_OK; - } else { - return getbytes_slow(d, buf, bytes); - } -} - -UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { - size_t ret = curbufleft(d); - memcpy(buf, d->ptr, ret); - if (in_residual_buf(d, d->ptr)) { - size_t copy = UPB_MIN(bytes - ret, d->size_param); - memcpy((char *)buf + ret, d->buf_param, copy); - ret += copy; - } - return ret; -} - -UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf, - size_t bytes) { - if (curbufleft(d) >= bytes) { - memcpy(buf, d->ptr, bytes); - return bytes; - } else { - return peekbytes_slow(d, buf, bytes); - } -} - - -/* Decoding of wire types *****************************************************/ - -/* Slow path for decoding a varint from the current buffer position. - * Returns a status code as described in decoder.int.h. */ -UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, - uint64_t *u64) { - uint8_t byte = 0x80; - int bitpos; - *u64 = 0; - for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - CHECK_RETURN(getbytes(d, &byte, 1)); - *u64 |= (uint64_t)(byte & 0x7F) << bitpos; - } - if(bitpos == 70 && (byte & 0x80)) { - seterr(d, kUnterminatedVarint); - return (int32_t)upb_pbdecoder_suspend(d); - } - return DECODE_OK; -} - -/* Decodes a varint from the current buffer position. - * Returns a status code as described in decoder.int.h. */ -UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { - if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) { - *u64 = *d->ptr; - advance(d, 1); - return DECODE_OK; - } else if (curbufleft(d) >= 10) { - /* Fast case. */ - upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) { - seterr(d, kUnterminatedVarint); - return (int32_t)upb_pbdecoder_suspend(d); - } - advance(d, r.p - d->ptr); - *u64 = r.val; - return DECODE_OK; - } else { - /* Slow case -- varint spans buffer seam. */ - return upb_pbdecoder_decode_varint_slow(d, u64); - } -} - -/* Decodes a 32-bit varint from the current buffer position. - * Returns a status code as described in decoder.int.h. */ -UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { - uint64_t u64; - int32_t ret = decode_varint(d, &u64); - if (ret >= 0) return ret; - if (u64 > UINT32_MAX) { - seterr(d, "Unterminated 32-bit varint"); - /* TODO(haberman) guarantee that this function return is >= 0 somehow, - * so we know this path will always be treated as error by our caller. - * Right now the size_t -> int32_t can overflow and produce negative values. - */ - *u32 = 0; - return (int32_t)upb_pbdecoder_suspend(d); - } - *u32 = (uint32_t)u64; - return DECODE_OK; -} - -/* Decodes a fixed32 from the current buffer position. - * Returns a status code as described in decoder.int.h. - * TODO: proper byte swapping for big-endian machines. */ -UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { - return getbytes(d, u32, 4); -} - -/* Decodes a fixed64 from the current buffer position. - * Returns a status code as described in decoder.int.h. - * TODO: proper byte swapping for big-endian machines. */ -UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { - return getbytes(d, u64, 8); -} - -/* Non-static versions of the above functions. - * These are called by the JIT for fallback paths. */ -int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) { - return decode_fixed32(d, u32); -} - -int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) { - return decode_fixed64(d, u64); -} - -static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } -static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } - -/* Pushes a frame onto the decoder stack. */ -static bool decoder_push(upb_pbdecoder *d, uint64_t end) { - upb_pbdecoder_frame *fr = d->top; - - if (end > fr->end_ofs) { - seterr(d, kPbDecoderSubmessageTooLong); - return false; - } else if (fr == d->limit) { - seterr(d, kPbDecoderStackOverflow); - return false; - } - - fr++; - fr->end_ofs = end; - fr->dispatch = NULL; - fr->groupnum = 0; - d->top = fr; - return true; -} - -static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { - /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence - * field number) prior to hitting any enclosing submessage end, pushing our - * existing delim end prevents us from continuing to parse values from a - * corrupt proto that doesn't give us an END tag in time. */ - if (!decoder_push(d, d->top->end_ofs)) - return false; - d->top->groupnum = arg; - return true; -} - -/* Pops a frame from the decoder stack. */ -static void decoder_pop(upb_pbdecoder *d) { d->top--; } - -UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, - uint64_t expected) { - uint64_t data = 0; - size_t bytes = upb_value_size(expected); - size_t read = peekbytes(d, &data, bytes); - if (read == bytes && data == expected) { - /* Advance past matched bytes. */ - int32_t ok = getbytes(d, &data, read); - UPB_ASSERT(ok < 0); - return DECODE_OK; - } else if (read < bytes && memcmp(&data, &expected, read) == 0) { - return (int32_t)suspend_save(d); - } else { - return DECODE_MISMATCH; - } -} - -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, - uint8_t wire_type) { - if (fieldnum >= 0) - goto have_tag; - - while (true) { - uint32_t tag; - CHECK_RETURN(decode_v32(d, &tag)); - wire_type = tag & 0x7; - fieldnum = tag >> 3; - -have_tag: - if (fieldnum == 0) { - seterr(d, "Saw invalid field number (0)"); - return (int32_t)upb_pbdecoder_suspend(d); - } - - switch (wire_type) { - case UPB_WIRE_TYPE_32BIT: - CHECK_RETURN(skip(d, 4)); - break; - case UPB_WIRE_TYPE_64BIT: - CHECK_RETURN(skip(d, 8)); - break; - case UPB_WIRE_TYPE_VARINT: { - uint64_t u64; - CHECK_RETURN(decode_varint(d, &u64)); - break; - } - case UPB_WIRE_TYPE_DELIMITED: { - uint32_t len; - CHECK_RETURN(decode_v32(d, &len)); - CHECK_RETURN(skip(d, len)); - break; - } - case UPB_WIRE_TYPE_START_GROUP: - if (!pushtagdelim(d, -fieldnum)) { - return (int32_t)upb_pbdecoder_suspend(d); - } - break; - case UPB_WIRE_TYPE_END_GROUP: - if (fieldnum == -d->top->groupnum) { - decoder_pop(d); - } else if (fieldnum == d->top->groupnum) { - return DECODE_ENDGROUP; - } else { - seterr(d, "Unmatched ENDGROUP tag."); - return (int32_t)upb_pbdecoder_suspend(d); - } - break; - default: - seterr(d, "Invalid wire type"); - return (int32_t)upb_pbdecoder_suspend(d); - } - - if (d->top->groupnum >= 0) { - /* TODO: More code needed for handling unknown groups. */ - upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint); - return DECODE_OK; - } - - /* Unknown group -- continue looping over unknown fields. */ - checkpoint(d); - } -} - -static void goto_endmsg(upb_pbdecoder *d) { - upb_value v; - bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v); - UPB_ASSERT(found); - d->pc = d->top->base + upb_value_getuint64(v); -} - -/* Parses a tag and jumps to the corresponding bytecode instruction for this - * field. - * - * If the tag is unknown (or the wire type doesn't match), parses the field as - * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode - * instruction for the end of message. */ -static int32_t dispatch(upb_pbdecoder *d) { - upb_inttable *dispatch = d->top->dispatch; - uint32_t tag; - uint8_t wire_type; - uint32_t fieldnum; - upb_value val; - int32_t retval; - - /* Decode tag. */ - CHECK_RETURN(decode_v32(d, &tag)); - wire_type = tag & 0x7; - fieldnum = tag >> 3; - - /* Lookup tag. Because of packed/non-packed compatibility, we have to - * check the wire type against two possibilities. */ - if (fieldnum != DISPATCH_ENDMSG && - upb_inttable_lookup32(dispatch, fieldnum, &val)) { - uint64_t v = upb_value_getuint64(val); - if (wire_type == (v & 0xff)) { - d->pc = d->top->base + (v >> 16); - return DECODE_OK; - } else if (wire_type == ((v >> 8) & 0xff)) { - bool found = - upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val); - UPB_ASSERT(found); - d->pc = d->top->base + upb_value_getuint64(val); - return DECODE_OK; - } - } - - /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG - * bytecode that triggered this is preceded by a CHECKDELIM bytecode which - * we need to back up to, so that when we're done skipping unknown data we - * can re-check the delimited end. */ - d->last--; /* Necessary if we get suspended */ - d->pc = d->last; - UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM); - - /* Unknown field or ENDGROUP. */ - retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); - - CHECK_RETURN(retval); - - if (retval == DECODE_ENDGROUP) { - goto_endmsg(d); - return DECODE_OK; - } - - return DECODE_OK; -} - -/* Callers know that the stack is more than one deep because the opcodes that - * call this only occur after PUSH operations. */ -upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) { - UPB_ASSERT(d->top != d->stack); - return d->top - 1; -} - - -/* The main decoding loop *****************************************************/ - -/* The main decoder VM function. Uses traditional bytecode dispatch loop with a - * switch() statement. */ -size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group, - const upb_bufhandle* handle) { - -#define VMCASE(op, code) \ - case op: { code; if (consumes_input(op)) checkpoint(d); break; } -#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \ - VMCASE(OP_PARSE_ ## type, { \ - ctype val; \ - CHECK_RETURN(decode_ ## wt(d, &val)); \ - upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \ - }) - - while(1) { - int32_t instruction; - opcode op; - uint32_t arg; - int32_t longofs; - - d->last = d->pc; - instruction = *d->pc++; - op = getop(instruction); - arg = instruction >> 8; - longofs = arg; - UPB_ASSERT(d->ptr != d->residual_end); - UPB_UNUSED(group); -#ifdef UPB_DUMP_BYTECODE - fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d " - "%x %s (%d)\n", - (int)offset(d), - (int)(d->ptr - d->buf), - (int)(d->data_end - d->ptr), - (int)(d->end - d->ptr), - (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)), - (int)(d->pc - 1 - group->bytecode), - upb_pbdecoder_getopname(op), - arg); -#endif - switch (op) { - /* Technically, we are losing data if we see a 32-bit varint that is not - * properly sign-extended. We could detect this and error about the data - * loss, but proto2 does not do this, so we pass. */ - PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t) - PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t) - PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t) - PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t) - PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t) - PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t) - PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t) - PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t) - PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t) - PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t) - PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t) - PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t) - PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t) - - VMCASE(OP_SETDISPATCH, - d->top->base = d->pc - 1; - memcpy(&d->top->dispatch, d->pc, sizeof(void*)); - d->pc += sizeof(void*) / sizeof(uint32_t); - ) - VMCASE(OP_STARTMSG, - CHECK_SUSPEND(upb_sink_startmsg(d->top->sink)); - ) - VMCASE(OP_ENDMSG, - CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status)); - ) - VMCASE(OP_STARTSEQ, - upb_pbdecoder_frame *outer = outer_frame(d); - CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink)); - ) - VMCASE(OP_ENDSEQ, - CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg)); - ) - VMCASE(OP_STARTSUBMSG, - upb_pbdecoder_frame *outer = outer_frame(d); - CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink)); - ) - VMCASE(OP_ENDSUBMSG, - upb_sink subsink = (d->top + 1)->sink; - CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, subsink, arg)); - ) - VMCASE(OP_STARTSTR, - uint32_t len = (uint32_t)delim_remaining(d); - upb_pbdecoder_frame *outer = outer_frame(d); - CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink)); - if (len == 0) { - d->pc++; /* Skip OP_STRING. */ - } - ) - VMCASE(OP_STRING, - uint32_t len = (uint32_t)curbufleft(d); - size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle); - if (n > len) { - if (n > delim_remaining(d)) { - seterr(d, "Tried to skip past end of string."); - return upb_pbdecoder_suspend(d); - } else { - int32_t ret = skip(d, n); - /* This shouldn't return DECODE_OK, because n > len. */ - UPB_ASSERT(ret >= 0); - return ret; - } - } - advance(d, n); - if (n < len || d->delim_end == NULL) { - /* We aren't finished with this string yet. */ - d->pc--; /* Repeat OP_STRING. */ - if (n > 0) checkpoint(d); - return upb_pbdecoder_suspend(d); - } - ) - VMCASE(OP_ENDSTR, - CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg)); - ) - VMCASE(OP_PUSHTAGDELIM, - CHECK_SUSPEND(pushtagdelim(d, arg)); - ) - VMCASE(OP_SETBIGGROUPNUM, - d->top->groupnum = *d->pc++; - ) - VMCASE(OP_POP, - UPB_ASSERT(d->top > d->stack); - decoder_pop(d); - ) - VMCASE(OP_PUSHLENDELIM, - uint32_t len; - CHECK_RETURN(decode_v32(d, &len)); - CHECK_SUSPEND(decoder_push(d, offset(d) + len)); - set_delim_end(d); - ) - VMCASE(OP_SETDELIM, - set_delim_end(d); - ) - VMCASE(OP_CHECKDELIM, - /* We are guaranteed of this assert because we never allow ourselves to - * consume bytes beyond data_end, which covers delim_end when non-NULL. - */ - UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end)); - if (d->ptr == d->delim_end) - d->pc += longofs; - ) - VMCASE(OP_CALL, - d->callstack[d->call_len++] = d->pc; - d->pc += longofs; - ) - VMCASE(OP_RET, - UPB_ASSERT(d->call_len > 0); - d->pc = d->callstack[--d->call_len]; - ) - VMCASE(OP_BRANCH, - d->pc += longofs; - ) - VMCASE(OP_TAG1, - uint8_t expected; - CHECK_SUSPEND(curbufleft(d) > 0); - expected = (arg >> 8) & 0xff; - if (*d->ptr == expected) { - advance(d, 1); - } else { - int8_t shortofs; - badtag: - shortofs = arg; - if (shortofs == LABEL_DISPATCH) { - CHECK_RETURN(dispatch(d)); - } else { - d->pc += shortofs; - break; /* Avoid checkpoint(). */ - } - } - ) - VMCASE(OP_TAG2, - uint16_t expected; - CHECK_SUSPEND(curbufleft(d) > 0); - expected = (arg >> 8) & 0xffff; - if (curbufleft(d) >= 2) { - uint16_t actual; - memcpy(&actual, d->ptr, 2); - if (expected == actual) { - advance(d, 2); - } else { - goto badtag; - } - } else { - int32_t result = upb_pbdecoder_checktag_slow(d, expected); - if (result == DECODE_MISMATCH) goto badtag; - if (result >= 0) return result; - } - ) - VMCASE(OP_TAGN, { - uint64_t expected; - int32_t result; - memcpy(&expected, d->pc, 8); - d->pc += 2; - result = upb_pbdecoder_checktag_slow(d, expected); - if (result == DECODE_MISMATCH) goto badtag; - if (result >= 0) return result; - }) - VMCASE(OP_DISPATCH, { - CHECK_RETURN(dispatch(d)); - }) - VMCASE(OP_HALT, { - return d->size_param; - }) - } - } -} - - -/* BytesHandler handlers ******************************************************/ - -void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { - upb_pbdecoder *d = closure; - UPB_UNUSED(size_hint); - d->top->end_ofs = UINT64_MAX; - d->bufstart_ofs = 0; - d->call_len = 1; - d->callstack[0] = &halt; - d->pc = pc; - d->skip = 0; - return d; -} - -bool upb_pbdecoder_end(void *closure, const void *handler_data) { - upb_pbdecoder *d = closure; - const upb_pbdecodermethod *method = handler_data; - uint64_t end; - char dummy; - - if (d->residual_end > d->residual) { - seterr(d, "Unexpected EOF: decoder still has buffered unparsed data"); - return false; - } - - if (d->skip) { - seterr(d, "Unexpected EOF inside skipped data"); - return false; - } - - if (d->top->end_ofs != UINT64_MAX) { - seterr(d, "Unexpected EOF inside delimited string"); - return false; - } - - /* The user's end() call indicates that the message ends here. */ - end = offset(d); - d->top->end_ofs = end; - - { - const uint32_t *p = d->pc; - d->stack->end_ofs = end; - /* Check the previous bytecode, but guard against beginning. */ - if (p != method->code_base.ptr) p--; - if (getop(*p) == OP_CHECKDELIM) { - /* Rewind from OP_TAG* to OP_CHECKDELIM. */ - UPB_ASSERT(getop(*d->pc) == OP_TAG1 || - getop(*d->pc) == OP_TAG2 || - getop(*d->pc) == OP_TAGN || - getop(*d->pc) == OP_DISPATCH); - d->pc = p; - } - upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL); - } - - if (d->call_len != 0) { - seterr(d, "Unexpected EOF inside submessage or group"); - return false; - } - - return true; -} - -size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf, - size_t size, const upb_bufhandle *handle) { - int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle); - - if (result == DECODE_ENDGROUP) goto_endmsg(decoder); - CHECK_RETURN(result); - - return run_decoder_vm(decoder, group, handle); -} - - -/* Public API *****************************************************************/ - -void upb_pbdecoder_reset(upb_pbdecoder *d) { - d->top = d->stack; - d->top->groupnum = 0; - d->ptr = d->residual; - d->buf = d->residual; - d->end = d->residual; - d->residual_end = d->residual; -} - -upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m, - upb_sink sink, upb_status *status) { - const size_t default_max_nesting = 64; - - upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder)); - if (!d) return NULL; - - d->method_ = m; - d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting)); - d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting)); - if (!d->stack || !d->callstack) { - return NULL; - } - - d->arena = a; - d->limit = d->stack + default_max_nesting - 1; - d->stack_size = default_max_nesting; - d->status = status; - - upb_pbdecoder_reset(d); - upb_bytessink_reset(&d->input_, &m->input_handler_, d); - - if (d->method_->dest_handlers_) { - if (sink.handlers != d->method_->dest_handlers_) - return NULL; - } - d->top->sink = sink; - - return d; -} - -uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { - return offset(d); -} - -const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { - return d->method_; -} - -upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) { - return d->input_; -} - -size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { - return d->stack_size; -} - -bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { - UPB_ASSERT(d->top >= d->stack); - - if (max < (size_t)(d->top - d->stack)) { - /* Can't set a limit smaller than what we are currently at. */ - return false; - } - - if (max > d->stack_size) { - /* Need to reallocate stack and callstack to accommodate. */ - size_t old_size = stacksize(d, d->stack_size); - size_t new_size = stacksize(d, max); - void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size); - if (!p) { - return false; - } - d->stack = p; - - old_size = callstacksize(d, d->stack_size); - new_size = callstacksize(d, max); - p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size); - if (!p) { - return false; - } - d->callstack = p; - - d->stack_size = max; - } - - d->limit = d->stack + max - 1; - return true; -} diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h deleted file mode 100644 index 8c4d8bf5d5..0000000000 --- a/upb/pb/decoder.h +++ /dev/null @@ -1,242 +0,0 @@ -/* -** upb::pb::Decoder -** -** A high performance, streaming, resumable decoder for the binary protobuf -** format. -** -** This interface works the same regardless of what decoder backend is being -** used. A client of this class does not need to know whether decoding is using -** a JITted decoder (DynASM, LLVM, etc) or an interpreted decoder. By default, -** it will always use the fastest available decoder. However, you can call -** set_allow_jit(false) to disable any JIT decoder that might be available. -** This is primarily useful for testing purposes. -*/ - -#ifndef UPB_DECODER_H_ -#define UPB_DECODER_H_ - -#include "upb/sink.h" - -#ifdef __cplusplus -namespace upb { -namespace pb { -class CodeCache; -class DecoderPtr; -class DecoderMethodPtr; -class DecoderMethodOptions; -} /* namespace pb */ -} /* namespace upb */ -#endif - -/* The maximum number of bytes we are required to buffer internally between - * calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte - * varint, less one because we are buffering an incomplete value. - * - * Should only be used by unit tests. */ -#define UPB_DECODER_MAX_RESIDUAL_BYTES 14 - -/* upb_pbdecodermethod ********************************************************/ - -struct upb_pbdecodermethod; -typedef struct upb_pbdecodermethod upb_pbdecodermethod; - -#ifdef __cplusplus -extern "C" { -#endif - -const upb_handlers *upb_pbdecodermethod_desthandlers( - const upb_pbdecodermethod *m); -const upb_byteshandler *upb_pbdecodermethod_inputhandler( - const upb_pbdecodermethod *m); -bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m); - -#ifdef __cplusplus -} /* extern "C" */ - -/* Represents the code to parse a protobuf according to a destination - * Handlers. */ -class upb::pb::DecoderMethodPtr { - public: - DecoderMethodPtr() : ptr_(nullptr) {} - DecoderMethodPtr(const upb_pbdecodermethod* ptr) : ptr_(ptr) {} - - const upb_pbdecodermethod* ptr() { return ptr_; } - - /* The destination handlers that are statically bound to this method. - * This method is only capable of outputting to a sink that uses these - * handlers. */ - const Handlers *dest_handlers() const { - return upb_pbdecodermethod_desthandlers(ptr_); - } - - /* The input handlers for this decoder method. */ - const BytesHandler* input_handler() const { - return upb_pbdecodermethod_inputhandler(ptr_); - } - - /* Whether this method is native. */ - bool is_native() const { - return upb_pbdecodermethod_isnative(ptr_); - } - - private: - const upb_pbdecodermethod* ptr_; -}; - -#endif - -/* upb_pbdecoder **************************************************************/ - -/* Preallocation hint: decoder won't allocate more bytes than this when first - * constructed. This hint may be an overestimate for some build configurations. - * But if the decoder library is upgraded without recompiling the application, - * it may be an underestimate. */ -#define UPB_PB_DECODER_SIZE 4416 - -struct upb_pbdecoder; -typedef struct upb_pbdecoder upb_pbdecoder; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_pbdecoder *upb_pbdecoder_create(upb_arena *arena, - const upb_pbdecodermethod *method, - upb_sink output, upb_status *status); -const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); -upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d); -uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d); -size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d); -bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max); -void upb_pbdecoder_reset(upb_pbdecoder *d); - -#ifdef __cplusplus -} /* extern "C" */ - -/* A Decoder receives binary protobuf data on its input sink and pushes the - * decoded data to its output sink. */ -class upb::pb::DecoderPtr { - public: - DecoderPtr() : ptr_(nullptr) {} - DecoderPtr(upb_pbdecoder* ptr) : ptr_(ptr) {} - - upb_pbdecoder* ptr() { return ptr_; } - - /* Constructs a decoder instance for the given method, which must outlive this - * decoder. Any errors during parsing will be set on the given status, which - * must also outlive this decoder. - * - * The sink must match the given method. */ - static DecoderPtr Create(Arena *arena, DecoderMethodPtr method, - upb::Sink output, Status *status) { - return DecoderPtr(upb_pbdecoder_create(arena->ptr(), method.ptr(), - output.sink(), status->ptr())); - } - - /* Returns the DecoderMethod this decoder is parsing from. */ - const DecoderMethodPtr method() const { - return DecoderMethodPtr(upb_pbdecoder_method(ptr_)); - } - - /* The sink on which this decoder receives input. */ - BytesSink input() { return BytesSink(upb_pbdecoder_input(ptr())); } - - /* Returns number of bytes successfully parsed. - * - * This can be useful for determining the stream position where an error - * occurred. - * - * This value may not be up-to-date when called from inside a parsing - * callback. */ - uint64_t BytesParsed() { return upb_pbdecoder_bytesparsed(ptr()); } - - /* Gets/sets the parsing nexting limit. If the total number of nested - * submessages and repeated fields hits this limit, parsing will fail. This - * is a resource limit that controls the amount of memory used by the parsing - * stack. - * - * Setting the limit will fail if the parser is currently suspended at a depth - * greater than this, or if memory allocation of the stack fails. */ - size_t max_nesting() { return upb_pbdecoder_maxnesting(ptr()); } - bool set_max_nesting(size_t max) { - return upb_pbdecoder_setmaxnesting(ptr(), max); - } - - void Reset() { upb_pbdecoder_reset(ptr()); } - - static const size_t kSize = UPB_PB_DECODER_SIZE; - - private: - upb_pbdecoder *ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_pbcodecache ************************************************************/ - -/* Lazily builds and caches decoder methods that will push data to the given - * handlers. The destination handlercache must outlive this object. */ - -struct upb_pbcodecache; -typedef struct upb_pbcodecache upb_pbcodecache; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest); -void upb_pbcodecache_free(upb_pbcodecache *c); -bool upb_pbcodecache_allowjit(const upb_pbcodecache *c); -void upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow); -void upb_pbcodecache_setlazy(upb_pbcodecache *c, bool lazy); -const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c, - const upb_msgdef *md); - -#ifdef __cplusplus -} /* extern "C" */ - -/* A class for caching protobuf processing code, whether bytecode for the - * interpreted decoder or machine code for the JIT. - * - * This class is not thread-safe. */ -class upb::pb::CodeCache { - public: - CodeCache(upb::HandlerCache *dest) - : ptr_(upb_pbcodecache_new(dest->ptr()), upb_pbcodecache_free) {} - CodeCache(CodeCache&&) = default; - CodeCache& operator=(CodeCache&&) = default; - - upb_pbcodecache* ptr() { return ptr_.get(); } - const upb_pbcodecache* ptr() const { return ptr_.get(); } - - /* Whether the cache is allowed to generate machine code. Defaults to true. - * There is no real reason to turn it off except for testing or if you are - * having a specific problem with the JIT. - * - * Note that allow_jit = true does not *guarantee* that the code will be JIT - * compiled. If this platform is not supported or the JIT was not compiled - * in, the code may still be interpreted. */ - bool allow_jit() const { return upb_pbcodecache_allowjit(ptr()); } - - /* This may only be called when the object is first constructed, and prior to - * any code generation. */ - void set_allow_jit(bool allow) { upb_pbcodecache_setallowjit(ptr(), allow); } - - /* Should the decoder push submessages to lazy handlers for fields that have - * them? The caller should set this iff the lazy handlers expect data that is - * in protobuf binary format and the caller wishes to lazy parse it. */ - void set_lazy(bool lazy) { upb_pbcodecache_setlazy(ptr(), lazy); } - - /* Returns a DecoderMethod that can push data to the given handlers. - * If a suitable method already exists, it will be returned from the cache. */ - const DecoderMethodPtr Get(MessageDefPtr md) { - return DecoderMethodPtr(upb_pbcodecache_get(ptr(), md.ptr())); - } - - private: - std::unique_ptr ptr_; -}; - -#endif /* __cplusplus */ - -#endif /* UPB_DECODER_H_ */ diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h deleted file mode 100644 index 9d5f5839bc..0000000000 --- a/upb/pb/decoder.int.h +++ /dev/null @@ -1,288 +0,0 @@ -/* -** Internal-only definitions for the decoder. -*/ - -#ifndef UPB_DECODER_INT_H_ -#define UPB_DECODER_INT_H_ - -#include "upb/def.h" -#include "upb/handlers.h" -#include "upb/pb/decoder.h" -#include "upb/sink.h" -#include "upb/table.int.h" - -#include "upb/port_def.inc" - -/* Opcode definitions. The canonical meaning of each opcode is its - * implementation in the interpreter (the JIT is written to match this). - * - * All instructions have the opcode in the low byte. - * Instruction format for most instructions is: - * - * +-------------------+--------+ - * | arg (24) | op (8) | - * +-------------------+--------+ - * - * Exceptions are indicated below. A few opcodes are multi-word. */ -typedef enum { - /* Opcodes 1-8, 13, 15-18 parse their respective descriptor types. - * Arg for all of these is the upb selector for this field. */ -#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type - T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32), - T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64), -#undef T - OP_STARTMSG = 9, /* No arg. */ - OP_ENDMSG = 10, /* No arg. */ - OP_STARTSEQ = 11, - OP_ENDSEQ = 12, - OP_STARTSUBMSG = 14, - OP_ENDSUBMSG = 19, - OP_STARTSTR = 20, - OP_STRING = 21, - OP_ENDSTR = 22, - - OP_PUSHTAGDELIM = 23, /* No arg. */ - OP_PUSHLENDELIM = 24, /* No arg. */ - OP_POP = 25, /* No arg. */ - OP_SETDELIM = 26, /* No arg. */ - OP_SETBIGGROUPNUM = 27, /* two words: - * | unused (24) | opc (8) | - * | groupnum (32) | */ - OP_CHECKDELIM = 28, - OP_CALL = 29, - OP_RET = 30, - OP_BRANCH = 31, - - /* Different opcodes depending on how many bytes expected. */ - OP_TAG1 = 32, /* | match tag (16) | jump target (8) | opc (8) | */ - OP_TAG2 = 33, /* | match tag (16) | jump target (8) | opc (8) | */ - OP_TAGN = 34, /* three words: */ - /* | unused (16) | jump target(8) | opc (8) | */ - /* | match tag 1 (32) | */ - /* | match tag 2 (32) | */ - - OP_SETDISPATCH = 35, /* N words: */ - /* | unused (24) | opc | */ - /* | upb_inttable* (32 or 64) | */ - - OP_DISPATCH = 36, /* No arg. */ - - OP_HALT = 37 /* No arg. */ -} opcode; - -#define OP_MAX OP_HALT - -UPB_INLINE opcode getop(uint32_t instr) { return (opcode)(instr & 0xff); } - -struct upb_pbcodecache { - upb_arena *arena; - upb_handlercache *dest; - bool allow_jit; - bool lazy; - - /* Map of upb_msgdef -> mgroup. */ - upb_inttable groups; -}; - -/* Method group; represents a set of decoder methods that had their code - * emitted together. Immutable once created. */ -typedef struct { - /* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. Owned by us. - * - * Ideally this would be on pbcodecache (if we were actually caching code). - * Right now we don't actually cache anything, which is wasteful. */ - upb_inttable methods; - - /* The bytecode for our methods, if any exists. Owned by us. */ - uint32_t *bytecode; - uint32_t *bytecode_end; -} mgroup; - -/* The maximum that any submessages can be nested. Matches proto2's limit. - * This specifies the size of the decoder's statically-sized array and therefore - * setting it high will cause the upb::pb::Decoder object to be larger. - * - * If necessary we can add a runtime-settable property to Decoder that allow - * this to be larger than the compile-time setting, but this would add - * complexity, particularly since we would have to decide how/if to give users - * the ability to set a custom memory allocation function. */ -#define UPB_DECODER_MAX_NESTING 64 - -/* Internal-only struct used by the decoder. */ -typedef struct { - /* Space optimization note: we store two pointers here that the JIT - * doesn't need at all; the upb_handlers* inside the sink and - * the dispatch table pointer. We can optimze so that the JIT uses - * smaller stack frames than the interpreter. The only thing we need - * to guarantee is that the fallback routines can find end_ofs. */ - upb_sink sink; - - /* The absolute stream offset of the end-of-frame delimiter. - * Non-delimited frames (groups and non-packed repeated fields) reuse the - * delimiter of their parent, even though the frame may not end there. - * - * NOTE: the JIT stores a slightly different value here for non-top frames. - * It stores the value relative to the end of the enclosed message. But the - * top frame is still stored the same way, which is important for ensuring - * that calls from the JIT into C work correctly. */ - uint64_t end_ofs; - const uint32_t *base; - - /* 0 indicates a length-delimited field. - * A positive number indicates a known group. - * A negative number indicates an unknown group. */ - int32_t groupnum; - upb_inttable *dispatch; /* Not used by the JIT. */ -} upb_pbdecoder_frame; - -struct upb_pbdecodermethod { - /* While compiling, the base is relative in "ofs", after compiling it is - * absolute in "ptr". */ - union { - uint32_t ofs; /* PC offset of method. */ - void *ptr; /* Pointer to bytecode or machine code for this method. */ - } code_base; - - /* The decoder method group to which this method belongs. */ - const mgroup *group; - - /* Whether this method is native code or bytecode. */ - bool is_native_; - - /* The handler one calls to invoke this method. */ - upb_byteshandler input_handler_; - - /* The destination handlers this method is bound to. We own a ref. */ - const upb_handlers *dest_handlers_; - - /* Dispatch table -- used by both bytecode decoder and JIT when encountering a - * field number that wasn't the one we were expecting to see. See - * decoder.int.h for the layout of this table. */ - upb_inttable dispatch; -}; - -struct upb_pbdecoder { - upb_arena *arena; - - /* Our input sink. */ - upb_bytessink input_; - - /* The decoder method we are parsing with (owned). */ - const upb_pbdecodermethod *method_; - - size_t call_len; - const uint32_t *pc, *last; - - /* Current input buffer and its stream offset. */ - const char *buf, *ptr, *end, *checkpoint; - - /* End of the delimited region, relative to ptr, NULL if not in this buf. */ - const char *delim_end; - - /* End of the delimited region, relative to ptr, end if not in this buf. */ - const char *data_end; - - /* Overall stream offset of "buf." */ - uint64_t bufstart_ofs; - - /* Buffer for residual bytes not parsed from the previous buffer. */ - char residual[UPB_DECODER_MAX_RESIDUAL_BYTES]; - char *residual_end; - - /* Bytes of data that should be discarded from the input beore we start - * parsing again. We set this when we internally determine that we can - * safely skip the next N bytes, but this region extends past the current - * user buffer. */ - size_t skip; - - /* Stores the user buffer passed to our decode function. */ - const char *buf_param; - size_t size_param; - const upb_bufhandle *handle; - - /* Our internal stack. */ - upb_pbdecoder_frame *stack, *top, *limit; - const uint32_t **callstack; - size_t stack_size; - - upb_status *status; -}; - -/* Decoder entry points; used as handlers. */ -void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint); -size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, - size_t size, const upb_bufhandle *handle); -bool upb_pbdecoder_end(void *closure, const void *handler_data); - -/* Decoder-internal functions that the JIT calls to handle fallback paths. */ -int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle); -size_t upb_pbdecoder_suspend(upb_pbdecoder *d); -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, - uint8_t wire_type); -int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected); -int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64); -int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32); -int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64); -void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg); - -/* Error messages that are shared between the bytecode and JIT decoders. */ -extern const char *kPbDecoderStackOverflow; -extern const char *kPbDecoderSubmessageTooLong; - -/* Access to decoderplan members needed by the decoder. */ -const char *upb_pbdecoder_getopname(unsigned int op); - -/* A special label that means "do field dispatch for this message and branch to - * wherever that takes you." */ -#define LABEL_DISPATCH 0 - -/* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or - * RET) for branching to when we find an appropriate ENDGROUP tag. */ -#define DISPATCH_ENDMSG 0 - -/* It's important to use this invalid wire type instead of 0 (which is a valid - * wire type). */ -#define NO_WIRE_TYPE 0xff - -/* The dispatch table layout is: - * [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ] - * - * If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup - * (UPB_MAX_FIELDNUMBER + fieldnum) and jump there. - * - * We need two wire types because of packed/non-packed compatibility. A - * primitive repeated field can use either wire type and be valid. While we - * could key the table on fieldnum+wiretype, the table would be 8x sparser. - * - * Storing two wire types in the primary value allows us to quickly rule out - * the second wire type without needing to do a separate lookup (this case is - * less common than an unknown field). */ -UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1, - uint8_t wt2) { - return (ofs << 16) | (wt2 << 8) | wt1; -} - -UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs, - uint8_t *wt1, uint8_t *wt2) { - *wt1 = (uint8_t)dispatch; - *wt2 = (uint8_t)(dispatch >> 8); - *ofs = dispatch >> 16; -} - -/* All of the functions in decoder.c that return int32_t return values according - * to the following scheme: - * 1. negative values indicate a return code from the following list. - * 2. positive values indicate that error or end of buffer was hit, and - * that the decode function should immediately return the given value - * (the decoder state has already been suspended and is ready to be - * resumed). */ -#define DECODE_OK -1 -#define DECODE_MISMATCH -2 /* Used only from checktag_slow(). */ -#define DECODE_ENDGROUP -3 /* Used only from checkunknown(). */ - -#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; } - -#include "upb/port_undef.inc" - -#endif /* UPB_DECODER_INT_H_ */ diff --git a/upb/pb/encoder.c b/upb/pb/encoder.c deleted file mode 100644 index 0c47b0dea6..0000000000 --- a/upb/pb/encoder.c +++ /dev/null @@ -1,563 +0,0 @@ -/* -** upb::Encoder -** -** Since we are implementing pure handlers (ie. without any out-of-band access -** to pre-computed lengths), we have to buffer all submessages before we can -** emit even their first byte. -** -** Not knowing the size of submessages also means we can't write a perfect -** zero-copy implementation, even with buffering. Lengths are stored as -** varints, which means that we don't know how many bytes to reserve for the -** length until we know what the length is. -** -** This leaves us with three main choices: -** -** 1. buffer all submessage data in a temporary buffer, then copy it exactly -** once into the output buffer. -** -** 2. attempt to buffer data directly into the output buffer, estimating how -** many bytes each length will take. When our guesses are wrong, use -** memmove() to grow or shrink the allotted space. -** -** 3. buffer directly into the output buffer, allocating a max length -** ahead-of-time for each submessage length. If we overallocated, we waste -** space, but no memcpy() or memmove() is required. This approach requires -** defining a maximum size for submessages and rejecting submessages that -** exceed that size. -** -** (2) and (3) have the potential to have better performance, but they are more -** complicated and subtle to implement: -** -** (3) requires making an arbitrary choice of the maximum message size; it -** wastes space when submessages are shorter than this and fails -** completely when they are longer. This makes it more finicky and -** requires configuration based on the input. It also makes it impossible -** to perfectly match the output of reference encoders that always use the -** optimal amount of space for each length. -** -** (2) requires guessing the the size upfront, and if multiple lengths are -** guessed wrong the minimum required number of memmove() operations may -** be complicated to compute correctly. Implemented properly, it may have -** a useful amortized or average cost, but more investigation is required -** to determine this and what the optimal algorithm is to achieve it. -** -** (1) makes you always pay for exactly one copy, but its implementation is -** the simplest and its performance is predictable. -** -** So for now, we implement (1) only. If we wish to optimize later, we should -** be able to do it without affecting users. -** -** The strategy is to buffer the segments of data that do *not* depend on -** unknown lengths in one buffer, and keep a separate buffer of segment pointers -** and lengths. When the top-level submessage ends, we can go beginning to end, -** alternating the writing of lengths with memcpy() of the rest of the data. -** At the top level though, no buffering is required. -*/ - -#include "upb/pb/encoder.h" -#include "upb/pb/varint.int.h" - -#include "upb/port_def.inc" - -/* The output buffer is divided into segments; a segment is a string of data - * that is "ready to go" -- it does not need any varint lengths inserted into - * the middle. The seams between segments are where varints will be inserted - * once they are known. - * - * We also use the concept of a "run", which is a range of encoded bytes that - * occur at a single submessage level. Every segment contains one or more runs. - * - * A segment can span messages. Consider: - * - * .--Submessage lengths---------. - * | | | - * | V V - * V | |--------------- | |----------------- - * Submessages: | |----------------------------------------------- - * Top-level msg: ------------------------------------------------------------ - * - * Segments: ----- ------------------- ----------------- - * Runs: *---- *--------------*--- *---------------- - * (* marks the start) - * - * Note that the top-level menssage is not in any segment because it does not - * have any length preceding it. - * - * A segment is only interrupted when another length needs to be inserted. So - * observe how the second segment spans both the inner submessage and part of - * the next enclosing message. */ -typedef struct { - uint32_t msglen; /* The length to varint-encode before this segment. */ - uint32_t seglen; /* Length of the segment. */ -} upb_pb_encoder_segment; - -struct upb_pb_encoder { - upb_arena *arena; - - /* Our input and output. */ - upb_sink input_; - upb_bytessink output_; - - /* The "subclosure" -- used as the inner closure as part of the bytessink - * protocol. */ - void *subc; - - /* The output buffer and limit, and our current write position. "buf" - * initially points to "initbuf", but is dynamically allocated if we need to - * grow beyond the initial size. */ - char *buf, *ptr, *limit; - - /* The beginning of the current run, or undefined if we are at the top - * level. */ - char *runbegin; - - /* The list of segments we are accumulating. */ - upb_pb_encoder_segment *segbuf, *segptr, *seglimit; - - /* The stack of enclosing submessages. Each entry in the stack points to the - * segment where this submessage's length is being accumulated. */ - int *stack, *top, *stacklimit; - - /* Depth of startmsg/endmsg calls. */ - int depth; -}; - -/* low-level buffering ********************************************************/ - -/* Low-level functions for interacting with the output buffer. */ - -/* TODO(haberman): handle pushback */ -static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) { - size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL); - UPB_ASSERT(n == len); -} - -static upb_pb_encoder_segment *top(upb_pb_encoder *e) { - return &e->segbuf[*e->top]; -} - -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -static bool reserve(upb_pb_encoder *e, size_t bytes) { - if ((size_t)(e->limit - e->ptr) < bytes) { - /* Grow buffer. */ - char *new_buf; - size_t needed = bytes + (e->ptr - e->buf); - size_t old_size = e->limit - e->buf; - - size_t new_size = old_size; - - while (new_size < needed) { - new_size *= 2; - } - - new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size); - - if (new_buf == NULL) { - return false; - } - - e->ptr = new_buf + (e->ptr - e->buf); - e->runbegin = new_buf + (e->runbegin - e->buf); - e->limit = new_buf + new_size; - e->buf = new_buf; - } - - return true; -} - -/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have - * previously called reserve() with at least this many bytes. */ -static void encoder_advance(upb_pb_encoder *e, size_t bytes) { - UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes); - e->ptr += bytes; -} - -/* Call when all of the bytes for a handler have been written. Flushes the - * bytes if possible and necessary, returning false if this failed. */ -static bool commit(upb_pb_encoder *e) { - if (!e->top) { - /* We aren't inside a delimited region. Flush our accumulated bytes to - * the output. - * - * TODO(haberman): in the future we may want to delay flushing for - * efficiency reasons. */ - putbuf(e, e->buf, e->ptr - e->buf); - e->ptr = e->buf; - } - - return true; -} - -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static bool encode_bytesval(upb_pb_encoder *e, const void *data, size_t len) { - if (!reserve(e, len)) { - return false; - } - - memcpy(e->ptr, data, len); - encoder_advance(e, len); - return true; -} - -/* Finish the current run by adding the run totals to the segment and message - * length. */ -static void accumulate(upb_pb_encoder *e) { - size_t run_len; - UPB_ASSERT(e->ptr >= e->runbegin); - run_len = e->ptr - e->runbegin; - e->segptr->seglen += run_len; - top(e)->msglen += run_len; - e->runbegin = e->ptr; -} - -/* Call to indicate the start of delimited region for which the full length is - * not yet known. All data will be buffered until the length is known. - * Delimited regions may be nested; their lengths will all be tracked properly. */ -static bool start_delim(upb_pb_encoder *e) { - if (e->top) { - /* We are already buffering, advance to the next segment and push it on the - * stack. */ - accumulate(e); - - if (++e->top == e->stacklimit) { - /* TODO(haberman): grow stack? */ - return false; - } - - if (++e->segptr == e->seglimit) { - /* Grow segment buffer. */ - size_t old_size = - (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); - size_t new_size = old_size * 2; - upb_pb_encoder_segment *new_buf = - upb_arena_realloc(e->arena, e->segbuf, old_size, new_size); - - if (new_buf == NULL) { - return false; - } - - e->segptr = new_buf + (e->segptr - e->segbuf); - e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); - e->segbuf = new_buf; - } - } else { - /* We were previously at the top level, start buffering. */ - e->segptr = e->segbuf; - e->top = e->stack; - e->runbegin = e->ptr; - } - - *e->top = (int)(e->segptr - e->segbuf); - e->segptr->seglen = 0; - e->segptr->msglen = 0; - - return true; -} - -/* Call to indicate the end of a delimited region. We now know the length of - * the delimited region. If we are not nested inside any other delimited - * regions, we can now emit all of the buffered data we accumulated. */ -static bool end_delim(upb_pb_encoder *e) { - size_t msglen; - accumulate(e); - msglen = top(e)->msglen; - - if (e->top == e->stack) { - /* All lengths are now available, emit all buffered data. */ - char buf[UPB_PB_VARINT_MAX_LEN]; - upb_pb_encoder_segment *s; - const char *ptr = e->buf; - for (s = e->segbuf; s <= e->segptr; s++) { - size_t lenbytes = upb_vencode64(s->msglen, buf); - putbuf(e, buf, lenbytes); - putbuf(e, ptr, s->seglen); - ptr += s->seglen; - } - - e->ptr = e->buf; - e->top = NULL; - } else { - /* Need to keep buffering; propagate length info into enclosing - * submessages. */ - --e->top; - top(e)->msglen += msglen + upb_varint_size(msglen); - } - - return true; -} - - -/* tag_t **********************************************************************/ - -/* A precomputed (pre-encoded) tag and length. */ - -typedef struct { - uint8_t bytes; - char tag[7]; -} tag_t; - -/* Allocates a new tag for this field, and sets it in these handlerattr. */ -static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt, - upb_handlerattr *attr) { - uint32_t n = upb_fielddef_number(f); - - tag_t *tag = upb_gmalloc(sizeof(tag_t)); - tag->bytes = upb_vencode64((n << 3) | wt, tag->tag); - - attr->handler_data = tag; - upb_handlers_addcleanup(h, tag, upb_gfree); -} - -static bool encode_tagval(upb_pb_encoder *e, const tag_t *tag) { - return encode_bytesval(e, tag->tag, tag->bytes); -} - - -/* encoding of wire types *****************************************************/ - -static bool doencode_fixed64(upb_pb_encoder *e, uint64_t val) { - /* TODO(haberman): byte-swap for big endian. */ - return encode_bytesval(e, &val, sizeof(uint64_t)); -} - -static bool doencode_fixed32(upb_pb_encoder *e, uint32_t val) { - /* TODO(haberman): byte-swap for big endian. */ - return encode_bytesval(e, &val, sizeof(uint32_t)); -} - -static bool doencode_varint(upb_pb_encoder *e, uint64_t val) { - if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) { - return false; - } - - encoder_advance(e, upb_vencode64(val, e->ptr)); - return true; -} - -static uint64_t dbl2uint64(double d) { - uint64_t ret; - memcpy(&ret, &d, sizeof(uint64_t)); - return ret; -} - -static uint32_t flt2uint32(float d) { - uint32_t ret; - memcpy(&ret, &d, sizeof(uint32_t)); - return ret; -} - - -/* encoding of proto types ****************************************************/ - -static bool startmsg(void *c, const void *hd) { - upb_pb_encoder *e = c; - UPB_UNUSED(hd); - if (e->depth++ == 0) { - upb_bytessink_start(e->output_, 0, &e->subc); - } - return true; -} - -static bool endmsg(void *c, const void *hd, upb_status *status) { - upb_pb_encoder *e = c; - UPB_UNUSED(hd); - UPB_UNUSED(status); - if (--e->depth == 0) { - upb_bytessink_end(e->output_); - } - return true; -} - -static void *encode_startdelimfield(void *c, const void *hd) { - bool ok = encode_tagval(c, hd) && commit(c) && start_delim(c); - return ok ? c : UPB_BREAK; -} - -static bool encode_unknown(void *c, const void *hd, const char *buf, - size_t len) { - UPB_UNUSED(hd); - return encode_bytesval(c, buf, len) && commit(c); -} - -static bool encode_enddelimfield(void *c, const void *hd) { - UPB_UNUSED(hd); - return end_delim(c); -} - -static void *encode_startgroup(void *c, const void *hd) { - return (encode_tagval(c, hd) && commit(c)) ? c : UPB_BREAK; -} - -static bool encode_endgroup(void *c, const void *hd) { - return encode_tagval(c, hd) && commit(c); -} - -static void *encode_startstr(void *c, const void *hd, size_t size_hint) { - UPB_UNUSED(size_hint); - return encode_startdelimfield(c, hd); -} - -static size_t encode_strbuf(void *c, const void *hd, const char *buf, - size_t len, const upb_bufhandle *h) { - UPB_UNUSED(hd); - UPB_UNUSED(h); - return encode_bytesval(c, buf, len) ? len : 0; -} - -#define T(type, ctype, convert, encode) \ - static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \ - return encode_tagval(e, hd) && encode(e, (convert)(val)) && commit(e); \ - } \ - static bool encode_packed_##type(void *e, const void *hd, ctype val) { \ - UPB_UNUSED(hd); \ - return encode(e, (convert)(val)); \ - } - -T(double, double, dbl2uint64, doencode_fixed64) -T(float, float, flt2uint32, doencode_fixed32) -T(int64, int64_t, uint64_t, doencode_varint) -T(int32, int32_t, int64_t, doencode_varint) -T(fixed64, uint64_t, uint64_t, doencode_fixed64) -T(fixed32, uint32_t, uint32_t, doencode_fixed32) -T(bool, bool, bool, doencode_varint) -T(uint32, uint32_t, uint32_t, doencode_varint) -T(uint64, uint64_t, uint64_t, doencode_varint) -T(enum, int32_t, uint32_t, doencode_varint) -T(sfixed32, int32_t, uint32_t, doencode_fixed32) -T(sfixed64, int64_t, uint64_t, doencode_fixed64) -T(sint32, int32_t, upb_zzenc_32, doencode_varint) -T(sint64, int64_t, upb_zzenc_64, doencode_varint) - -#undef T - - -/* code to build the handlers *************************************************/ - -#include -static void newhandlers_callback(const void *closure, upb_handlers *h) { - const upb_msgdef *m; - int i, n; - - UPB_UNUSED(closure); - - upb_handlers_setstartmsg(h, startmsg, NULL); - upb_handlers_setendmsg(h, endmsg, NULL); - upb_handlers_setunknown(h, encode_unknown, NULL); - - m = upb_handlers_msgdef(h); - n = upb_msgdef_fieldcount(m); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(m, i); - bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && - upb_fielddef_packed(f); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - upb_wiretype_t wt = - packed ? UPB_WIRE_TYPE_DELIMITED - : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; - - /* Pre-encode the tag for this field. */ - new_tag(h, f, wt, &attr); - - if (packed) { - upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr); - upb_handlers_setendseq(h, f, encode_enddelimfield, &attr); - } - -#define T(upper, lower, upbtype) \ - case UPB_DESCRIPTOR_TYPE_##upper: \ - if (packed) { \ - upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \ - } else { \ - upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \ - } \ - break; - - switch (upb_fielddef_descriptortype(f)) { - T(DOUBLE, double, double); - T(FLOAT, float, float); - T(INT64, int64, int64); - T(INT32, int32, int32); - T(FIXED64, fixed64, uint64); - T(FIXED32, fixed32, uint32); - T(BOOL, bool, bool); - T(UINT32, uint32, uint32); - T(UINT64, uint64, uint64); - T(ENUM, enum, int32); - T(SFIXED32, sfixed32, int32); - T(SFIXED64, sfixed64, int64); - T(SINT32, sint32, int32); - T(SINT64, sint64, int64); - case UPB_DESCRIPTOR_TYPE_STRING: - case UPB_DESCRIPTOR_TYPE_BYTES: - upb_handlers_setstartstr(h, f, encode_startstr, &attr); - upb_handlers_setendstr(h, f, encode_enddelimfield, &attr); - upb_handlers_setstring(h, f, encode_strbuf, &attr); - break; - case UPB_DESCRIPTOR_TYPE_MESSAGE: - upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr); - upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr); - break; - case UPB_DESCRIPTOR_TYPE_GROUP: { - /* Endgroup takes a different tag (wire_type = END_GROUP). */ - upb_handlerattr attr2 = UPB_HANDLERATTR_INIT; - new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2); - - upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr); - upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2); - - break; - } - } - -#undef T - } -} - -void upb_pb_encoder_reset(upb_pb_encoder *e) { - e->segptr = NULL; - e->top = NULL; - e->depth = 0; -} - - -/* public API *****************************************************************/ - -upb_handlercache *upb_pb_encoder_newcache(void) { - return upb_handlercache_new(newhandlers_callback, NULL); -} - -upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h, - upb_bytessink output) { - const size_t initial_bufsize = 256; - const size_t initial_segbufsize = 16; - /* TODO(haberman): make this configurable. */ - const size_t stack_size = 64; - - upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder)); - if (!e) return NULL; - - e->buf = upb_arena_malloc(arena, initial_bufsize); - e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf)); - e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack)); - - if (!e->buf || !e->segbuf || !e->stack) { - return NULL; - } - - e->limit = e->buf + initial_bufsize; - e->seglimit = e->segbuf + initial_segbufsize; - e->stacklimit = e->stack + stack_size; - - upb_pb_encoder_reset(e); - upb_sink_reset(&e->input_, h, e); - - e->arena = arena; - e->output_ = output; - e->subc = output.closure; - e->ptr = e->buf; - - return e; -} - -upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; } diff --git a/upb/pb/encoder.h b/upb/pb/encoder.h deleted file mode 100644 index f125b37218..0000000000 --- a/upb/pb/encoder.h +++ /dev/null @@ -1,83 +0,0 @@ -/* -** upb::pb::Encoder (upb_pb_encoder) -** -** Implements a set of upb_handlers that write protobuf data to the binary wire -** format. -** -** This encoder implementation does not have any access to any out-of-band or -** precomputed lengths for submessages, so it must buffer submessages internally -** before it can emit the first byte. -*/ - -#ifndef UPB_ENCODER_H_ -#define UPB_ENCODER_H_ - -#include "upb/sink.h" - -#ifdef __cplusplus -namespace upb { -namespace pb { -class EncoderPtr; -} /* namespace pb */ -} /* namespace upb */ -#endif - -#define UPB_PBENCODER_MAX_NESTING 100 - -/* upb_pb_encoder *************************************************************/ - -/* Preallocation hint: decoder won't allocate more bytes than this when first - * constructed. This hint may be an overestimate for some build configurations. - * But if the decoder library is upgraded without recompiling the application, - * it may be an underestimate. */ -#define UPB_PB_ENCODER_SIZE 784 - -struct upb_pb_encoder; -typedef struct upb_pb_encoder upb_pb_encoder; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_sink upb_pb_encoder_input(upb_pb_encoder *p); -upb_pb_encoder* upb_pb_encoder_create(upb_arena* a, const upb_handlers* h, - upb_bytessink output); - -/* Lazily builds and caches handlers that will push encoded data to a bytessink. - * Any msgdef objects used with this object must outlive it. */ -upb_handlercache *upb_pb_encoder_newcache(void); - -#ifdef __cplusplus -} /* extern "C" { */ - -class upb::pb::EncoderPtr { - public: - EncoderPtr(upb_pb_encoder* ptr) : ptr_(ptr) {} - - upb_pb_encoder* ptr() { return ptr_; } - - /* Creates a new encoder in the given environment. The Handlers must have - * come from NewHandlers() below. */ - static EncoderPtr Create(Arena* arena, const Handlers* handlers, - BytesSink output) { - return EncoderPtr( - upb_pb_encoder_create(arena->ptr(), handlers, output.sink())); - } - - /* The input to the encoder. */ - upb::Sink input() { return upb_pb_encoder_input(ptr()); } - - /* Creates a new set of handlers for this MessageDef. */ - static HandlerCache NewCache() { - return HandlerCache(upb_pb_encoder_newcache()); - } - - static const size_t kSize = UPB_PB_ENCODER_SIZE; - - private: - upb_pb_encoder* ptr_; -}; - -#endif /* __cplusplus */ - -#endif /* UPB_ENCODER_H_ */ diff --git a/upb/pb/make-gdb-script.rb b/upb/pb/make-gdb-script.rb deleted file mode 100755 index 3895597887..0000000000 --- a/upb/pb/make-gdb-script.rb +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/ruby - -puts "set width 0 -set height 0 -set verbose off\n\n" - -IO.popen("nm -S /tmp/upb-jit-code.so").each_line { |line| - # Input lines look like this: - # 000000000000575a T X.0x10.OP_CHECKDELIM - # - # For each one we want to emit a command that looks like: - # b X.0x10.OP_CHECKDELIM - # commands - # silent - # printf "buf_ofs=%d data_rem=%d delim_rem=%d X.0x10.OP_CHECKDELIM\n", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx - # continue - # end - - parts = line.split - next if parts[1] != "T" - sym = parts[2] - next if sym !~ /X\./; - if sym =~ /OP_/ then - printcmd = "printf \"buf_ofs=%d data_rem=%d delim_rem=%d #{sym}\\n\", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx" - elsif sym =~ /enterjit/ then - printcmd = "printf \"#{sym} bytes=%d\\n\", $rcx" - else - printcmd = "printf \"#{sym}\\n\"" - end - puts "b #{sym} -commands - silent - #{printcmd} - continue -end\n\n" -} diff --git a/upb/pb/textprinter.c b/upb/pb/textprinter.c deleted file mode 100644 index 1331268a06..0000000000 --- a/upb/pb/textprinter.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * upb::pb::TextPrinter - * - * OPT: This is not optimized at all. It uses printf() which parses the format - * string every time, and it allocates memory for every put. - */ - -#include "upb/pb/textprinter.h" - -#include -#include -#include -#include -#include -#include - -#include "upb/sink.h" - -#include "upb/port_def.inc" - -struct upb_textprinter { - upb_sink input_; - upb_bytessink output_; - int indent_depth_; - bool single_line_; - void *subc; -}; - -#define CHECK(x) if ((x) < 0) goto err; - -static const char *shortname(const char *longname) { - const char *last = strrchr(longname, '.'); - return last ? last + 1 : longname; -} - -static int indent(upb_textprinter *p) { - int i; - if (!p->single_line_) - for (i = 0; i < p->indent_depth_; i++) - upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL); - return 0; -} - -static int endfield(upb_textprinter *p) { - const char ch = (p->single_line_ ? ' ' : '\n'); - upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL); - return 0; -} - -static int putescaped(upb_textprinter *p, const char *buf, size_t len, - bool preserve_utf8) { - /* Based on CEscapeInternal() from Google's protobuf release. */ - char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); - const char *end = buf + len; - - /* I think hex is prettier and more useful, but proto2 uses octal; should - * investigate whether it can parse hex also. */ - const bool use_hex = false; - bool last_hex_escape = false; /* true if last output char was \xNN */ - - for (; buf < end; buf++) { - bool is_hex_escape; - - if (dstend - dst < 4) { - upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); - dst = dstbuf; - } - - is_hex_escape = false; - switch (*buf) { - case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break; - case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break; - case '\t': *(dst++) = '\\'; *(dst++) = 't'; break; - case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break; - case '\'': *(dst++) = '\\'; *(dst++) = '\''; break; - case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break; - default: - /* Note that if we emit \xNN and the buf character after that is a hex - * digit then that digit must be escaped too to prevent it being - * interpreted as part of the character code by C. */ - if ((!preserve_utf8 || (uint8_t)*buf < 0x80) && - (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) { - sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf); - is_hex_escape = use_hex; - dst += 4; - } else { - *(dst++) = *buf; break; - } - } - last_hex_escape = is_hex_escape; - } - /* Flush remaining data. */ - upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); - return 0; -} - -bool putf(upb_textprinter *p, const char *fmt, ...) { - va_list args; - va_list args_copy; - char *str; - int written; - int len; - bool ok; - - va_start(args, fmt); - - /* Run once to get the length of the string. */ - va_copy(args_copy, args); - len = vsnprintf(NULL, 0, fmt, args_copy); - va_end(args_copy); - - /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */ - str = upb_gmalloc(len + 1); - if (!str) return false; - written = vsprintf(str, fmt, args); - va_end(args); - UPB_ASSERT(written == len); - - ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL); - upb_gfree(str); - return ok; -} - - -/* handlers *******************************************************************/ - -static bool textprinter_startmsg(void *c, const void *hd) { - upb_textprinter *p = c; - UPB_UNUSED(hd); - if (p->indent_depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc); - } - return true; -} - -static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) { - upb_textprinter *p = c; - UPB_UNUSED(hd); - UPB_UNUSED(s); - if (p->indent_depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -#define TYPE(name, ctype, fmt) \ - static bool textprinter_put ## name(void *closure, const void *handler_data, \ - ctype val) { \ - upb_textprinter *p = closure; \ - const upb_fielddef *f = handler_data; \ - CHECK(indent(p)); \ - putf(p, "%s: " fmt, upb_fielddef_name(f), val); \ - CHECK(endfield(p)); \ - return true; \ - err: \ - return false; \ -} - -static bool textprinter_putbool(void *closure, const void *handler_data, - bool val) { - upb_textprinter *p = closure; - const upb_fielddef *f = handler_data; - CHECK(indent(p)); - putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false"); - CHECK(endfield(p)); - return true; -err: - return false; -} - -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x) - -TYPE(int32, int32_t, "%" PRId32) -TYPE(int64, int64_t, "%" PRId64) -TYPE(uint32, uint32_t, "%" PRIu32) -TYPE(uint64, uint64_t, "%" PRIu64) -TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") -TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") - -#undef TYPE - -/* Output a symbolic value from the enum if found, else just print as int32. */ -static bool textprinter_putenum(void *closure, const void *handler_data, - int32_t val) { - upb_textprinter *p = closure; - const upb_fielddef *f = handler_data; - const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f); - const char *label = upb_enumdef_iton(enum_def, val); - if (label) { - indent(p); - putf(p, "%s: %s", upb_fielddef_name(f), label); - endfield(p); - } else { - if (!textprinter_putint32(closure, handler_data, val)) - return false; - } - return true; -} - -static void *textprinter_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_textprinter *p = closure; - const upb_fielddef *f = handler_data; - UPB_UNUSED(size_hint); - indent(p); - putf(p, "%s: \"", upb_fielddef_name(f)); - return p; -} - -static bool textprinter_endstr(void *closure, const void *handler_data) { - upb_textprinter *p = closure; - UPB_UNUSED(handler_data); - putf(p, "\""); - endfield(p); - return true; -} - -static size_t textprinter_putstr(void *closure, const void *hd, const char *buf, - size_t len, const upb_bufhandle *handle) { - upb_textprinter *p = closure; - const upb_fielddef *f = hd; - UPB_UNUSED(handle); - CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING)); - return len; -err: - return 0; -} - -static void *textprinter_startsubmsg(void *closure, const void *handler_data) { - upb_textprinter *p = closure; - const char *name = handler_data; - CHECK(indent(p)); - putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n'); - p->indent_depth_++; - return p; -err: - return UPB_BREAK; -} - -static bool textprinter_endsubmsg(void *closure, const void *handler_data) { - upb_textprinter *p = closure; - UPB_UNUSED(handler_data); - p->indent_depth_--; - CHECK(indent(p)); - upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL); - CHECK(endfield(p)); - return true; -err: - return false; -} - -static void onmreg(const void *c, upb_handlers *h) { - const upb_msgdef *m = upb_handlers_msgdef(h); - int i, n; - UPB_UNUSED(c); - - upb_handlers_setstartmsg(h, textprinter_startmsg, NULL); - upb_handlers_setendmsg(h, textprinter_endmsg, NULL); - - n = upb_msgdef_fieldcount(m); - for(i = 0; i < n; i++) { - const upb_fielddef *f = upb_msgdef_field(m, i); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = f; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - upb_handlers_setint32(h, f, textprinter_putint32, &attr); - break; - case UPB_TYPE_INT64: - upb_handlers_setint64(h, f, textprinter_putint64, &attr); - break; - case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, f, textprinter_putuint32, &attr); - break; - case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, f, textprinter_putuint64, &attr); - break; - case UPB_TYPE_FLOAT: - upb_handlers_setfloat(h, f, textprinter_putfloat, &attr); - break; - case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, f, textprinter_putdouble, &attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, f, textprinter_putbool, &attr); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - upb_handlers_setstartstr(h, f, textprinter_startstr, &attr); - upb_handlers_setstring(h, f, textprinter_putstr, &attr); - upb_handlers_setendstr(h, f, textprinter_endstr, &attr); - break; - case UPB_TYPE_MESSAGE: { - const char *name = - upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP - ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f))) - : upb_fielddef_name(f); - attr.handler_data = name; - upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr); - upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr); - break; - } - case UPB_TYPE_ENUM: - upb_handlers_setint32(h, f, textprinter_putenum, &attr); - break; - } - } -} - -static void textprinter_reset(upb_textprinter *p, bool single_line) { - p->single_line_ = single_line; - p->indent_depth_ = 0; -} - - -/* Public API *****************************************************************/ - -upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h, - upb_bytessink output) { - upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter)); - if (!p) return NULL; - - p->output_ = output; - upb_sink_reset(&p->input_, h, p); - textprinter_reset(p, false); - - return p; -} - -upb_handlercache *upb_textprinter_newcache(void) { - return upb_handlercache_new(&onmreg, NULL); -} - -upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; } - -void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) { - p->single_line_ = single_line; -} diff --git a/upb/pb/textprinter.h b/upb/pb/textprinter.h deleted file mode 100644 index 7e20d7521b..0000000000 --- a/upb/pb/textprinter.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -** upb::pb::TextPrinter (upb_textprinter) -** -** Handlers for writing to protobuf text format. -*/ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - -#include "upb/sink.h" - -#ifdef __cplusplus -namespace upb { -namespace pb { -class TextPrinterPtr; -} /* namespace pb */ -} /* namespace upb */ -#endif - -/* upb_textprinter ************************************************************/ - -struct upb_textprinter; -typedef struct upb_textprinter upb_textprinter; - -#ifdef __cplusplus -extern "C" { -#endif - -/* C API. */ -upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h, - upb_bytessink output); -void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line); -upb_sink upb_textprinter_input(upb_textprinter *p); -upb_handlercache *upb_textprinter_newcache(void); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::pb::TextPrinterPtr { - public: - TextPrinterPtr(upb_textprinter* ptr) : ptr_(ptr) {} - - /* The given handlers must have come from NewHandlers(). It must outlive the - * TextPrinter. */ - static TextPrinterPtr Create(Arena *arena, upb::HandlersPtr *handlers, - BytesSink output) { - return TextPrinterPtr( - upb_textprinter_create(arena->ptr(), handlers->ptr(), output.sink())); - } - - void SetSingleLineMode(bool single_line) { - upb_textprinter_setsingleline(ptr_, single_line); - } - - Sink input() { return upb_textprinter_input(ptr_); } - - /* If handler caching becomes a requirement we can add a code cache as in - * decoder.h */ - static HandlerCache NewCache() { - return HandlerCache(upb_textprinter_newcache()); - } - - private: - upb_textprinter* ptr_; -}; - -#endif - -#endif /* UPB_TEXT_H_ */ diff --git a/upb/pb/varint.c b/upb/pb/varint.c deleted file mode 100644 index 90f58a138f..0000000000 --- a/upb/pb/varint.c +++ /dev/null @@ -1,74 +0,0 @@ - -#include "upb/pb/varint.int.h" - -/* Index is descriptor type. */ -const uint8_t upb_pb_native_wire_types[] = { - UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ - UPB_WIRE_TYPE_64BIT, /* DOUBLE */ - UPB_WIRE_TYPE_32BIT, /* FLOAT */ - UPB_WIRE_TYPE_VARINT, /* INT64 */ - UPB_WIRE_TYPE_VARINT, /* UINT64 */ - UPB_WIRE_TYPE_VARINT, /* INT32 */ - UPB_WIRE_TYPE_64BIT, /* FIXED64 */ - UPB_WIRE_TYPE_32BIT, /* FIXED32 */ - UPB_WIRE_TYPE_VARINT, /* BOOL */ - UPB_WIRE_TYPE_DELIMITED, /* STRING */ - UPB_WIRE_TYPE_START_GROUP, /* GROUP */ - UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ - UPB_WIRE_TYPE_DELIMITED, /* BYTES */ - UPB_WIRE_TYPE_VARINT, /* UINT32 */ - UPB_WIRE_TYPE_VARINT, /* ENUM */ - UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ - UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ - UPB_WIRE_TYPE_VARINT, /* SINT32 */ - UPB_WIRE_TYPE_VARINT, /* SINT64 */ -}; - -/* A basic branch-based decoder, uses 32-bit values to get good performance - * on 32-bit architectures (but performs well on 64-bits also). - * This scheme comes from the original Google Protobuf implementation - * (proto2). */ -upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) { - upb_decoderet err = {NULL, 0}; - const char *p = r.p; - uint32_t low = (uint32_t)r.val; - uint32_t high = 0; - uint32_t b; - b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7fU) << 28; - high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done; - return err; - -done: - r.val = ((uint64_t)high << 32) | low; - r.p = p; - return r; -} - -/* Like the previous, but uses 64-bit values. */ -upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) { - const char *p = r.p; - uint64_t val = r.val; - uint64_t b; - upb_decoderet err = {NULL, 0}; - b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done; - return err; - -done: - r.val = val; - r.p = p; - return r; -} diff --git a/upb/pb/varint.int.h b/upb/pb/varint.int.h deleted file mode 100644 index 9b98a81c76..0000000000 --- a/upb/pb/varint.int.h +++ /dev/null @@ -1,164 +0,0 @@ -/* -** A number of routines for varint manipulation (we keep them all around to -** have multiple approaches available for benchmarking). -*/ - -#ifndef UPB_VARINT_DECODER_H_ -#define UPB_VARINT_DECODER_H_ - -#include -#include -#include -#include "upb/upb.h" - -#include "upb/port_def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -#define UPB_MAX_WIRE_TYPE 5 - -/* The maximum number of bytes that it takes to encode a 64-bit varint. */ -#define UPB_PB_VARINT_MAX_LEN 10 - -/* Array of the "native" (ie. non-packed-repeated) wire type for the given a - * descriptor type (upb_descriptortype_t). */ -extern const uint8_t upb_pb_native_wire_types[]; - -UPB_INLINE uint64_t byteswap64(uint64_t val) { - uint64_t byte = 0xff; - return (val & (byte << 56) >> 56) - | (val & (byte << 48) >> 40) - | (val & (byte << 40) >> 24) - | (val & (byte << 32) >> 8) - | (val & (byte << 24) << 8) - | (val & (byte << 16) << 24) - | (val & (byte << 8) << 40) - | (val & (byte << 0) << 56); -} - -/* Zig-zag encoding/decoding **************************************************/ - -UPB_INLINE int32_t upb_zzdec_32(uint64_t _n) { - uint32_t n = (uint32_t)_n; - return (n >> 1) ^ -(int32_t)(n & 1); -} -UPB_INLINE int64_t upb_zzdec_64(uint64_t n) { - return (n >> 1) ^ -(int64_t)(n & 1); -} -UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} - -/* Decoding *******************************************************************/ - -/* All decoding functions return this struct by value. */ -typedef struct { - const char *p; /* NULL if the varint was unterminated. */ - uint64_t val; -} upb_decoderet; - -UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) { - upb_decoderet ret; - ret.p = p; - ret.val = val; - return ret; -} - -upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r); -upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r); - -/* Template for a function that checks the first two bytes with branching - * and dispatches 2-10 bytes with a separate function. Note that this may read - * up to 10 bytes, so it must not be used unless there are at least ten bytes - * left in the buffer! */ -#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ - uint8_t *p = (uint8_t*)_p; \ - upb_decoderet r; \ - if ((*p & 0x80) == 0) { \ - /* Common case: one-byte varint. */ \ - return upb_decoderet_make(_p + 1, *p & 0x7fU); \ - } \ - r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)); \ - if ((*(p + 1) & 0x80) == 0) { \ - /* Two-byte varint. */ \ - return r; \ - } \ - /* Longer varint, fallback to out-of-line function. */ \ - return decode_max8_function(r); \ -} - -UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32) -UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64) -#undef UPB_VARINT_DECODER_CHECK2 - -/* Our canonical functions for decoding varints, based on the currently - * favored best-performing implementations. */ -UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) { - if (sizeof(long) == 8) - return upb_vdecode_check2_branch64(p); - else - return upb_vdecode_check2_branch32(p); -} - - -/* Encoding *******************************************************************/ - -UPB_INLINE int upb_value_size(uint64_t val) { -#ifdef __GNUC__ - /* 0-based, undef if val == 0. */ - int high_bit = val ? 63 - __builtin_clzll(val) : 0; -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 8 + 1; -} - -/* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN - * bytes long), returning how many bytes were used. - * - * TODO: benchmark and optimize if necessary. */ -UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) { - size_t i; - if (val == 0) { buf[0] = 0; return 1; } - i = 0; - while (val) { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } - return i; -} - -UPB_INLINE size_t upb_varint_size(uint64_t val) { - char buf[UPB_PB_VARINT_MAX_LEN]; - return upb_vencode64(val, buf); -} - -/* Encodes a 32-bit varint, *not* sign-extended. */ -UPB_INLINE uint64_t upb_vencode32(uint32_t val) { - char buf[UPB_PB_VARINT_MAX_LEN]; - size_t bytes = upb_vencode64(val, buf); - uint64_t ret = 0; - UPB_ASSERT(bytes <= 5); - memcpy(&ret, buf, bytes); - ret = _upb_be_swap64(ret); - UPB_ASSERT(ret <= 0xffffffffffU); - return ret; -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port_undef.inc" - -#endif /* UPB_VARINT_DECODER_H_ */ diff --git a/upb/sink.c b/upb/sink.c deleted file mode 100644 index d55d258b23..0000000000 --- a/upb/sink.c +++ /dev/null @@ -1,17 +0,0 @@ - -#include "upb/sink.h" - -bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) { - void *subc; - bool ret; - upb_bufhandle handle = UPB_BUFHANDLE_INIT; - handle.buf = buf; - ret = upb_bytessink_start(sink, len, &subc); - if (ret && len != 0) { - ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len); - } - if (ret) { - ret = upb_bytessink_end(sink); - } - return ret; -} diff --git a/upb/sink.h b/upb/sink.h deleted file mode 100644 index 871a8f57a7..0000000000 --- a/upb/sink.h +++ /dev/null @@ -1,517 +0,0 @@ -/* -** upb::Sink (upb_sink) -** upb::BytesSink (upb_bytessink) -** -** A upb_sink is an object that binds a upb_handlers object to some runtime -** state. It is the object that can actually receive data via the upb_handlers -** interface. -** -** Unlike upb_def and upb_handlers, upb_sink is never frozen, immutable, or -** thread-safe. You can create as many of them as you want, but each one may -** only be used in a single thread at a time. -** -** If we compare with class-based OOP, a you can think of a upb_def as an -** abstract base class, a upb_handlers as a concrete derived class, and a -** upb_sink as an object (class instance). -*/ - -#ifndef UPB_SINK_H -#define UPB_SINK_H - -#include "upb/handlers.h" - -#include "upb/port_def.inc" - -#ifdef __cplusplus -namespace upb { -class BytesSink; -class Sink; -} -#endif - -/* upb_sink *******************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - const upb_handlers *handlers; - void *closure; -} upb_sink; - -#define PUTVAL(type, ctype) \ - UPB_INLINE bool upb_sink_put##type(upb_sink s, upb_selector_t sel, \ - ctype val) { \ - typedef upb_##type##_handlerfunc functype; \ - functype *func; \ - const void *hd; \ - if (!s.handlers) return true; \ - func = (functype *)upb_handlers_gethandler(s.handlers, sel, &hd); \ - if (!func) return true; \ - return func(s.closure, hd, val); \ - } - -PUTVAL(int32, int32_t) -PUTVAL(int64, int64_t) -PUTVAL(uint32, uint32_t) -PUTVAL(uint64, uint64_t) -PUTVAL(float, float) -PUTVAL(double, double) -PUTVAL(bool, bool) -#undef PUTVAL - -UPB_INLINE void upb_sink_reset(upb_sink *s, const upb_handlers *h, void *c) { - s->handlers = h; - s->closure = c; -} - -UPB_INLINE size_t upb_sink_putstring(upb_sink s, upb_selector_t sel, - const char *buf, size_t n, - const upb_bufhandle *handle) { - typedef upb_string_handlerfunc func; - func *handler; - const void *hd; - if (!s.handlers) return n; - handler = (func *)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!handler) return n; - return handler(s.closure, hd, buf, n, handle); -} - -UPB_INLINE bool upb_sink_putunknown(upb_sink s, const char *buf, size_t n) { - typedef upb_unknown_handlerfunc func; - func *handler; - const void *hd; - if (!s.handlers) return true; - handler = - (func *)upb_handlers_gethandler(s.handlers, UPB_UNKNOWN_SELECTOR, &hd); - - if (!handler) return n; - return handler(s.closure, hd, buf, n); -} - -UPB_INLINE bool upb_sink_startmsg(upb_sink s) { - typedef upb_startmsg_handlerfunc func; - func *startmsg; - const void *hd; - if (!s.handlers) return true; - startmsg = - (func *)upb_handlers_gethandler(s.handlers, UPB_STARTMSG_SELECTOR, &hd); - - if (!startmsg) return true; - return startmsg(s.closure, hd); -} - -UPB_INLINE bool upb_sink_endmsg(upb_sink s, upb_status *status) { - typedef upb_endmsg_handlerfunc func; - func *endmsg; - const void *hd; - if (!s.handlers) return true; - endmsg = - (func *)upb_handlers_gethandler(s.handlers, UPB_ENDMSG_SELECTOR, &hd); - - if (!endmsg) return true; - return endmsg(s.closure, hd, status); -} - -UPB_INLINE bool upb_sink_startseq(upb_sink s, upb_selector_t sel, - upb_sink *sub) { - typedef upb_startfield_handlerfunc func; - func *startseq; - const void *hd; - sub->closure = s.closure; - sub->handlers = s.handlers; - if (!s.handlers) return true; - startseq = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!startseq) return true; - sub->closure = startseq(s.closure, hd); - return sub->closure ? true : false; -} - -UPB_INLINE bool upb_sink_endseq(upb_sink s, upb_selector_t sel) { - typedef upb_endfield_handlerfunc func; - func *endseq; - const void *hd; - if (!s.handlers) return true; - endseq = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!endseq) return true; - return endseq(s.closure, hd); -} - -UPB_INLINE bool upb_sink_startstr(upb_sink s, upb_selector_t sel, - size_t size_hint, upb_sink *sub) { - typedef upb_startstr_handlerfunc func; - func *startstr; - const void *hd; - sub->closure = s.closure; - sub->handlers = s.handlers; - if (!s.handlers) return true; - startstr = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!startstr) return true; - sub->closure = startstr(s.closure, hd, size_hint); - return sub->closure ? true : false; -} - -UPB_INLINE bool upb_sink_endstr(upb_sink s, upb_selector_t sel) { - typedef upb_endfield_handlerfunc func; - func *endstr; - const void *hd; - if (!s.handlers) return true; - endstr = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!endstr) return true; - return endstr(s.closure, hd); -} - -UPB_INLINE bool upb_sink_startsubmsg(upb_sink s, upb_selector_t sel, - upb_sink *sub) { - typedef upb_startfield_handlerfunc func; - func *startsubmsg; - const void *hd; - sub->closure = s.closure; - if (!s.handlers) { - sub->handlers = NULL; - return true; - } - sub->handlers = upb_handlers_getsubhandlers_sel(s.handlers, sel); - startsubmsg = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!startsubmsg) return true; - sub->closure = startsubmsg(s.closure, hd); - return sub->closure ? true : false; -} - -UPB_INLINE bool upb_sink_endsubmsg(upb_sink s, upb_sink sub, - upb_selector_t sel) { - typedef upb_endfield_handlerfunc func; - func *endsubmsg; - const void *hd; - if (!s.handlers) return true; - endsubmsg = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!endsubmsg) return true; - return endsubmsg(sub.closure, hd); -} - -#ifdef __cplusplus -} /* extern "C" */ - -/* A upb::Sink is an object that binds a upb::Handlers object to some runtime - * state. It represents an endpoint to which data can be sent. - * - * TODO(haberman): right now all of these functions take selectors. Should they - * take selectorbase instead? - * - * ie. instead of calling: - * sink->StartString(FOO_FIELD_START_STRING, ...) - * a selector base would let you say: - * sink->StartString(FOO_FIELD, ...) - * - * This would make call sites a little nicer and require emitting fewer selector - * definitions in .h files. - * - * But the current scheme has the benefit that you can retrieve a function - * pointer for any handler with handlers->GetHandler(selector), without having - * to have a separate GetHandler() function for each handler type. The JIT - * compiler uses this. To accommodate we'd have to expose a separate - * GetHandler() for every handler type. - * - * Also to ponder: selectors right now are independent of a specific Handlers - * instance. In other words, they allocate a number to every possible handler - * that *could* be registered, without knowing anything about what handlers - * *are* registered. That means that using selectors as table offsets prohibits - * us from compacting the handler table at Freeze() time. If the table is very - * sparse, this could be wasteful. - * - * Having another selector-like thing that is specific to a Handlers instance - * would allow this compacting, but then it would be impossible to write code - * ahead-of-time that can be bound to any Handlers instance at runtime. For - * example, a .proto file parser written as straight C will not know what - * Handlers it will be bound to, so when it calls sink->StartString() what - * selector will it pass? It needs a selector like we have today, that is - * independent of any particular upb::Handlers. - * - * Is there a way then to allow Handlers table compaction? */ -class upb::Sink { - public: - /* Constructor with no initialization; must be Reset() before use. */ - Sink() {} - - Sink(const Sink&) = default; - Sink& operator=(const Sink&) = default; - - Sink(const upb_sink& sink) : sink_(sink) {} - Sink &operator=(const upb_sink &sink) { - sink_ = sink; - return *this; - } - - upb_sink sink() { return sink_; } - - /* Constructs a new sink for the given frozen handlers and closure. - * - * TODO: once the Handlers know the expected closure type, verify that T - * matches it. */ - template Sink(const upb_handlers* handlers, T* closure) { - Reset(handlers, closure); - } - - upb_sink* ptr() { return &sink_; } - - /* Resets the value of the sink. */ - template void Reset(const upb_handlers* handlers, T* closure) { - upb_sink_reset(&sink_, handlers, closure); - } - - /* Returns the top-level object that is bound to this sink. - * - * TODO: once the Handlers know the expected closure type, verify that T - * matches it. */ - template T* GetObject() const { - return static_cast(sink_.closure); - } - - /* Functions for pushing data into the sink. - * - * These return false if processing should stop (either due to error or just - * to suspend). - * - * These may not be called from within one of the same sink's handlers (in - * other words, handlers are not re-entrant). */ - - /* Should be called at the start and end of every message; both the top-level - * message and submessages. This means that submessages should use the - * following sequence: - * sink->StartSubMessage(startsubmsg_selector); - * sink->StartMessage(); - * // ... - * sink->EndMessage(&status); - * sink->EndSubMessage(endsubmsg_selector); */ - bool StartMessage() { return upb_sink_startmsg(sink_); } - bool EndMessage(upb_status *status) { - return upb_sink_endmsg(sink_, status); - } - - /* Putting of individual values. These work for both repeated and - * non-repeated fields, but for repeated fields you must wrap them in - * calls to StartSequence()/EndSequence(). */ - bool PutInt32(HandlersPtr::Selector s, int32_t val) { - return upb_sink_putint32(sink_, s, val); - } - - bool PutInt64(HandlersPtr::Selector s, int64_t val) { - return upb_sink_putint64(sink_, s, val); - } - - bool PutUInt32(HandlersPtr::Selector s, uint32_t val) { - return upb_sink_putuint32(sink_, s, val); - } - - bool PutUInt64(HandlersPtr::Selector s, uint64_t val) { - return upb_sink_putuint64(sink_, s, val); - } - - bool PutFloat(HandlersPtr::Selector s, float val) { - return upb_sink_putfloat(sink_, s, val); - } - - bool PutDouble(HandlersPtr::Selector s, double val) { - return upb_sink_putdouble(sink_, s, val); - } - - bool PutBool(HandlersPtr::Selector s, bool val) { - return upb_sink_putbool(sink_, s, val); - } - - /* Putting of string/bytes values. Each string can consist of zero or more - * non-contiguous buffers of data. - * - * For StartString(), the function will write a sink for the string to "sub." - * The sub-sink must be used for any/all PutStringBuffer() calls. */ - bool StartString(HandlersPtr::Selector s, size_t size_hint, Sink* sub) { - upb_sink sub_c; - bool ret = upb_sink_startstr(sink_, s, size_hint, &sub_c); - *sub = sub_c; - return ret; - } - - size_t PutStringBuffer(HandlersPtr::Selector s, const char *buf, size_t len, - const upb_bufhandle *handle) { - return upb_sink_putstring(sink_, s, buf, len, handle); - } - - bool EndString(HandlersPtr::Selector s) { - return upb_sink_endstr(sink_, s); - } - - /* For submessage fields. - * - * For StartSubMessage(), the function will write a sink for the string to - * "sub." The sub-sink must be used for any/all handlers called within the - * submessage. */ - bool StartSubMessage(HandlersPtr::Selector s, Sink* sub) { - upb_sink sub_c; - bool ret = upb_sink_startsubmsg(sink_, s, &sub_c); - *sub = sub_c; - return ret; - } - - bool EndSubMessage(HandlersPtr::Selector s, Sink sub) { - return upb_sink_endsubmsg(sink_, sub.sink_, s); - } - - /* For repeated fields of any type, the sequence of values must be wrapped in - * these calls. - * - * For StartSequence(), the function will write a sink for the string to - * "sub." The sub-sink must be used for any/all handlers called within the - * sequence. */ - bool StartSequence(HandlersPtr::Selector s, Sink* sub) { - upb_sink sub_c; - bool ret = upb_sink_startseq(sink_, s, &sub_c); - *sub = sub_c; - return ret; - } - - bool EndSequence(HandlersPtr::Selector s) { - return upb_sink_endseq(sink_, s); - } - - /* Copy and assign specifically allowed. - * We don't even bother making these members private because so many - * functions need them and this is mainly just a dumb data container anyway. - */ - - private: - upb_sink sink_; -}; - -#endif /* __cplusplus */ - -/* upb_bytessink **************************************************************/ - -typedef struct { - const upb_byteshandler *handler; - void *closure; -} upb_bytessink ; - -UPB_INLINE void upb_bytessink_reset(upb_bytessink* s, const upb_byteshandler *h, - void *closure) { - s->handler = h; - s->closure = closure; -} - -UPB_INLINE bool upb_bytessink_start(upb_bytessink s, size_t size_hint, - void **subc) { - typedef upb_startstr_handlerfunc func; - func *start; - *subc = s.closure; - if (!s.handler) return true; - start = (func *)s.handler->table[UPB_STARTSTR_SELECTOR].func; - - if (!start) return true; - *subc = start(s.closure, - s.handler->table[UPB_STARTSTR_SELECTOR].attr.handler_data, - size_hint); - return *subc != NULL; -} - -UPB_INLINE size_t upb_bytessink_putbuf(upb_bytessink s, void *subc, - const char *buf, size_t size, - const upb_bufhandle* handle) { - typedef upb_string_handlerfunc func; - func *putbuf; - if (!s.handler) return true; - putbuf = (func *)s.handler->table[UPB_STRING_SELECTOR].func; - - if (!putbuf) return true; - return putbuf(subc, s.handler->table[UPB_STRING_SELECTOR].attr.handler_data, - buf, size, handle); -} - -UPB_INLINE bool upb_bytessink_end(upb_bytessink s) { - typedef upb_endfield_handlerfunc func; - func *end; - if (!s.handler) return true; - end = (func *)s.handler->table[UPB_ENDSTR_SELECTOR].func; - - if (!end) return true; - return end(s.closure, - s.handler->table[UPB_ENDSTR_SELECTOR].attr.handler_data); -} - -#ifdef __cplusplus - -class upb::BytesSink { - public: - BytesSink() {} - - BytesSink(const BytesSink&) = default; - BytesSink& operator=(const BytesSink&) = default; - - BytesSink(const upb_bytessink& sink) : sink_(sink) {} - BytesSink &operator=(const upb_bytessink &sink) { - sink_ = sink; - return *this; - } - - upb_bytessink sink() { return sink_; } - - /* Constructs a new sink for the given frozen handlers and closure. - * - * TODO(haberman): once the Handlers know the expected closure type, verify - * that T matches it. */ - template BytesSink(const upb_byteshandler* handler, T* closure) { - upb_bytessink_reset(sink_, handler, closure); - } - - /* Resets the value of the sink. */ - template void Reset(const upb_byteshandler* handler, T* closure) { - upb_bytessink_reset(&sink_, handler, closure); - } - - bool Start(size_t size_hint, void **subc) { - return upb_bytessink_start(sink_, size_hint, subc); - } - - size_t PutBuffer(void *subc, const char *buf, size_t len, - const upb_bufhandle *handle) { - return upb_bytessink_putbuf(sink_, subc, buf, len, handle); - } - - bool End() { - return upb_bytessink_end(sink_); - } - - private: - upb_bytessink sink_; -}; - -#endif /* __cplusplus */ - -/* upb_bufsrc *****************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink); - -#ifdef __cplusplus -} /* extern "C" */ - -namespace upb { -template bool PutBuffer(const T& str, BytesSink sink) { - return upb_bufsrc_putbuf(str.data(), str.size(), sink.sink()); -} -} - -#endif /* __cplusplus */ - -#include "upb/port_undef.inc" - -#endif