From 4df91612df87d4fc96e7cfa3203f4e08a7d13e1b Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 12 Mar 2020 09:09:06 -0700 Subject: [PATCH] Squashed 'third_party/upb/' changes from 97bcd5276c..4e2505edaa 4e2505edaa Merge pull request #259 from haberman/rmsubmodule 91fd76cc9c Merge pull request #258 from haberman/bazel_version d0d5339620 Removed .gitmodules, we don't use submodules any more. 6c5d5afc43 Remove compatibility code for old Bazel versions. c1357afb2e Merge pull request #230 from moroten/maybe-deps a3d693544b Merge pull request #257 from haberman/warnings2 63e673383b Fixed narrowing warnings in text_encode.c. 39bc93a527 Merge pull request #252 from haberman/jsondecode 0fdd65f223 Merge branch 'master' into jsondecode 82af3d661c Merge pull request #255 from protocolbuffers/const-array 90b3a20af0 Making sure _upb_fieldtype_to_sizelg2 is readonly 5667a7a806 Removed stray assert, causing C90 build errors. 23a5af3513 [json] fixed all remaining conformance bugs. 6ec4df82c1 Updated other parts of upb for new JSON name API. a292261aeb Added JSON decoder to conformance tests, and fixed tons of bugs. d49c1db6c2 New JSON decoder, string->msg using reflection. f593289087 Merge branch 'master' into maybe-deps 0c2046f732 Make workspace_deps.bzl overridable using maybe() git-subtree-dir: third_party/upb git-subtree-split: 4e2505edaa325bcf0ffd69eefa7cb810d6dde861 --- .gitmodules | 3 - BUILD | 2 + CMakeLists.txt | 2 + bazel/repository_defs.bzl | 15 - bazel/upb_proto_library.bzl | 33 - bazel/workspace_deps.bzl | 18 +- generated_for_cmake/upb/json/parser.c | 10 +- tests/conformance_upb.c | 46 +- upb/def.c | 168 ++- upb/def.h | 30 +- upb/json/parser.rl | 10 +- upb/json/printer.c | 8 +- upb/json_decode.c | 1405 +++++++++++++++++++++++++ upb/json_decode.h | 24 + upb/json_encode.c | 96 +- upb/msg.c | 2 +- upb/reflection.c | 22 +- upb/reflection.h | 3 + upb/text_encode.c | 2 +- 19 files changed, 1697 insertions(+), 202 deletions(-) delete mode 100644 .gitmodules delete mode 100644 bazel/repository_defs.bzl create mode 100644 upb/json_decode.c create mode 100644 upb/json_decode.h diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 8b52c1d2165..00000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "third_party/protobuf"] - path = third_party/protobuf - url = https://github.com/google/protobuf.git diff --git a/BUILD b/BUILD index 61601a067e1..cddda82d18c 100644 --- a/BUILD +++ b/BUILD @@ -162,9 +162,11 @@ cc_library( cc_library( name = "json", srcs = [ + "upb/json_decode.c", "upb/json_encode.c", ], hdrs = [ + "upb/json_decode.h", "upb/json_encode.h", ], deps = [ diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f0b2420bb4..1f6004543fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,9 @@ target_link_libraries(textformat port reflection) add_library(json + upb/json_decode.c upb/json_encode.c + upb/json_decode.h upb/json_encode.h) target_link_libraries(json port diff --git a/bazel/repository_defs.bzl b/bazel/repository_defs.bzl deleted file mode 100644 index 7b6e78e8b83..00000000000 --- a/bazel/repository_defs.bzl +++ /dev/null @@ -1,15 +0,0 @@ -# A hacky way to work around the fact that native.bazel_version is only -# available from WORKSPACE macros, not BUILD macros or rules. -# -# Hopefully we can remove this if/when this is fixed: -# https://github.com/bazelbuild/bazel/issues/8305 - -def _impl(repository_ctx): - s = "bazel_version = \"" + native.bazel_version + "\"" - repository_ctx.file("bazel_version.bzl", s) - repository_ctx.file("BUILD", "") - -bazel_version_repository = repository_rule( - implementation = _impl, - local = True, -) diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index 9ad578f96f6..596b9c4c41a 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -6,11 +6,6 @@ load("@bazel_skylib//lib:paths.bzl", "paths") load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") -# copybara:strip_for_google3_begin -load("@bazel_skylib//lib:versions.bzl", "versions") -load("@upb_bazel_version//:bazel_version.bzl", "bazel_version") -# copybara:strip_end - # Generic support code ######################################################### _is_bazel = not hasattr(native, "genmpm") @@ -79,34 +74,6 @@ def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos): unsupported_features = ctx.disabled_features, ) - # copybara:strip_for_google3_begin - if bazel_version == "0.24.1": - # Compatibility code until gRPC is on 0.25.2 or later. - compilation_info = cc_common.compile( - ctx = ctx, - feature_configuration = feature_configuration, - cc_toolchain = toolchain, - srcs = srcs, - hdrs = hdrs, - compilation_contexts = compilation_contexts, - ) - linking_info = cc_common.link( - ctx = ctx, - feature_configuration = feature_configuration, - cc_toolchain = toolchain, - cc_compilation_outputs = compilation_info.cc_compilation_outputs, - linking_contexts = linking_contexts, - ) - return CcInfo( - compilation_context = compilation_info.compilation_context, - linking_context = linking_info.linking_context, - ) - - if not versions.is_at_least("0.25.2", bazel_version): - fail("upb requires Bazel >=0.25.2 or 0.24.1") - - # copybara:strip_end - blaze_only_args = {} if not _is_bazel: diff --git a/bazel/workspace_deps.bzl b/bazel/workspace_deps.bzl index a03a643a324..2a7f88f495f 100644 --- a/bazel/workspace_deps.bzl +++ b/bazel/workspace_deps.bzl @@ -1,33 +1,33 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") -load("//bazel:repository_defs.bzl", "bazel_version_repository") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") def upb_deps(): - bazel_version_repository( - name = "upb_bazel_version", - ) - - git_repository( + maybe( + git_repository, name = "com_google_absl", commit = "070f6e47b33a2909d039e620c873204f78809492", remote = "https://github.com/abseil/abseil-cpp.git", shallow_since = "1541627663 -0500", ) - git_repository( + maybe( + git_repository, name = "com_google_protobuf", remote = "https://github.com/protocolbuffers/protobuf.git", commit = "d41002663fd04325ead28439dfd5ce2822b0d6fb", ) - http_archive( + maybe( + http_archive, name = "bazel_skylib", strip_prefix = "bazel-skylib-master", urls = ["https://github.com/bazelbuild/bazel-skylib/archive/master.tar.gz"], ) - http_archive( + maybe( + http_archive, name = "zlib", build_file = "@com_google_protobuf//:third_party/zlib.BUILD", sha256 = "629380c90a77b964d896ed37163f5c3a34f6e6d897311f1df2a7016355c45eff", diff --git a/generated_for_cmake/upb/json/parser.c b/generated_for_cmake/upb/json/parser.c index 33bb442f51b..03087845d5f 100644 --- a/generated_for_cmake/upb/json/parser.c +++ b/generated_for_cmake/upb/json/parser.c @@ -3311,15 +3311,13 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_value v = upb_value_constptr(f); - char *buf; + const char *name; /* Add an entry for the JSON name. */ - size_t len = upb_fielddef_getjsonname(f, NULL, 0); - buf = upb_malloc(alloc, len); - upb_fielddef_getjsonname(f, buf, len); - upb_strtable_insert3(&m->name_table, buf, strlen(buf), v, alloc); + name = upb_fielddef_jsonname(f); + upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - if (strcmp(buf, upb_fielddef_name(f)) != 0) { + if (strcmp(name, upb_fielddef_name(f)) != 0) { /* Since the JSON name is different from the regular field name, add an * entry for the raw name (compliant proto3 JSON parsers must accept * both). */ diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index 8285242d3ed..1d87060095f 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -15,6 +15,7 @@ #include "upb/decode.h" #include "upb/encode.h" #include "upb/reflection.h" +#include "upb/json_decode.h" #include "upb/json_encode.h" #include "upb/text_encode.h" @@ -85,9 +86,11 @@ void serialize_text(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { size_t len2; int opts = 0; char *data; + if (!conformance_ConformanceRequest_print_unknown_fields(c->request)) { opts |= UPB_TXTENC_SKIPUNKNOWN; } + len = upb_text_encode(msg, m, c->symtab, opts, NULL, 0); data = upb_arena_malloc(c->arena, len + 1); len2 = upb_text_encode(msg, m, c->symtab, opts, data, len + 1); @@ -96,6 +99,33 @@ void serialize_text(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { c->response, upb_strview_make(data, len)); } +bool parse_json(upb_msg *msg, const upb_msgdef *m, const ctx* c) { + upb_strview json = + conformance_ConformanceRequest_json_payload(c->request); + upb_status status; + int opts = 0; + + if (conformance_ConformanceRequest_test_category(c->request) == + conformance_JSON_IGNORE_UNKNOWN_PARSING_TEST) { + opts |= UPB_JSONDEC_IGNOREUNKNOWN; + } + + upb_status_clear(&status); + if (upb_json_decode(json.data, json.size, msg, m, c->symtab, opts, c->arena, + &status)) { + return true; + } else { + const char *inerr = upb_status_errmsg(&status); + size_t len = strlen(inerr); + char *err = upb_arena_malloc(c->arena, len + 1); + memcpy(err, inerr, strlen(inerr)); + err[len] = '\0'; + conformance_ConformanceResponse_set_parse_error(c->response, + upb_strview_makez(err)); + return false; + } +} + void serialize_json(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { size_t len; size_t len2; @@ -104,16 +134,16 @@ void serialize_json(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { upb_status status; upb_status_clear(&status); - if (!conformance_ConformanceRequest_print_unknown_fields(c->request)) { - opts |= UPB_TXTENC_SKIPUNKNOWN; - } - len = upb_json_encode(msg, m, c->symtab, opts, NULL, 0, &status); if (len == -1) { - static const char msg[] = "Error serializing."; - conformance_ConformanceResponse_set_serialize_error( - c->response, upb_strview_make(msg, strlen(msg))); + const char *inerr = upb_status_errmsg(&status); + size_t len = strlen(inerr); + char *err = upb_arena_malloc(c->arena, len + 1); + memcpy(err, inerr, strlen(inerr)); + err[len] = '\0'; + conformance_ConformanceResponse_set_serialize_error(c->response, + upb_strview_makez(err)); return; } @@ -128,6 +158,8 @@ bool parse_input(upb_msg *msg, const upb_msgdef *m, const ctx* c) { switch (conformance_ConformanceRequest_payload_case(c->request)) { case conformance_ConformanceRequest_payload_protobuf_payload: return parse_proto(msg, m, c); + case conformance_ConformanceRequest_payload_json_payload: + return parse_json(msg, m, c); case conformance_ConformanceRequest_payload_NOT_SET: fprintf(stderr, "conformance_upb: Request didn't have payload.\n"); return false; diff --git a/upb/def.c b/upb/def.c index 8385e9638a4..f1b477bdc90 100644 --- a/upb/def.c +++ b/upb/def.c @@ -27,6 +27,7 @@ struct upb_fielddef { const upb_filedef *file; const upb_msgdef *msgdef; const char *full_name; + const char *json_name; union { int64_t sint; uint64_t uint; @@ -117,10 +118,15 @@ struct upb_symtab { /* Inside a symtab we store tagged pointers to specific def types. */ typedef enum { - UPB_DEFTYPE_MSG = 0, - UPB_DEFTYPE_ENUM = 1, - UPB_DEFTYPE_FIELD = 2, - UPB_DEFTYPE_ONEOF = 3 + UPB_DEFTYPE_FIELD = 0, + + /* Only inside symtab table. */ + UPB_DEFTYPE_MSG = 1, + UPB_DEFTYPE_ENUM = 2, + + /* Only inside message table. */ + UPB_DEFTYPE_ONEOF = 1, + UPB_DEFTYPE_FIELD_JSONNAME = 2 } upb_deftype_t; static const void *unpack_def(upb_value v, upb_deftype_t type) { @@ -462,47 +468,12 @@ const char *upb_fielddef_name(const upb_fielddef *f) { return shortdefname(f->full_name); } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; +const char *upb_fielddef_jsonname(const upb_fielddef *f) { + return f->json_name; } -size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) { - const char *name = upb_fielddef_name(f); - size_t src, dst = 0; - bool ucase_next = false; - -#define WRITE(byte) \ - ++dst; \ - if (dst < len) buf[dst - 1] = byte; \ - else if (dst == len) buf[dst - 1] = '\0' - - if (!name) { - WRITE('\0'); - return 0; - } - - /* Implement the transformation as described in the spec: - * 1. upper case all letters after an underscore. - * 2. remove all underscores. - */ - for (src = 0; name[src]; src++) { - if (name[src] == '_') { - ucase_next = true; - continue; - } - - if (ucase_next) { - WRITE(toupper(name[src])); - ucase_next = false; - } else { - WRITE(name[src]); - } - } - - WRITE('\0'); - return dst; - -#undef WRITE +uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { + return f->selector_base; } const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { @@ -690,18 +661,30 @@ bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len, *o = unpack_def(val, UPB_DEFTYPE_ONEOF); *f = unpack_def(val, UPB_DEFTYPE_FIELD); - UPB_ASSERT((*o != NULL) ^ (*f != NULL)); /* Exactly one of the two should be set. */ - return true; + return *o || *f; /* False if this was a JSON name. */ +} + +const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, + const char *name, size_t len) { + upb_value val; + const upb_fielddef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return NULL; + } + + f = unpack_def(val, UPB_DEFTYPE_FIELD); + if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; } int upb_msgdef_numfields(const upb_msgdef *m) { - /* The number table contains only fields. */ - return (int)upb_inttable_count(&m->itof); + return m->field_count; } int upb_msgdef_numoneofs(const upb_msgdef *m) { - /* The name table includes oneofs, and the number table does not. */ - return (int)(upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof)); + return m->oneof_count; } const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) { @@ -1098,6 +1081,51 @@ static const char *makefullname(const symtab_addctx *ctx, const char *prefix, } } +size_t getjsonname(const char *name, char *buf, size_t len) { + size_t src, dst = 0; + bool ucase_next = false; + +#define WRITE(byte) \ + ++dst; \ + if (dst < len) buf[dst - 1] = byte; \ + else if (dst == len) buf[dst - 1] = '\0' + + if (!name) { + WRITE('\0'); + return 0; + } + + /* Implement the transformation as described in the spec: + * 1. upper case all letters after an underscore. + * 2. remove all underscores. + */ + for (src = 0; name[src]; src++) { + if (name[src] == '_') { + ucase_next = true; + continue; + } + + if (ucase_next) { + WRITE(toupper(name[src])); + ucase_next = false; + } else { + WRITE(name[src]); + } + } + + WRITE('\0'); + return dst; + +#undef WRITE +} + +static char* makejsonname(const char* name, upb_alloc *alloc) { + size_t size = getjsonname(name, NULL, 0); + char* json_name = upb_malloc(alloc, size); + getjsonname(name, json_name, size); + return json_name; +} + static bool symtab_add(const symtab_addctx *ctx, const char *name, upb_value v) { upb_value tmp; @@ -1311,6 +1339,7 @@ static bool create_fielddef( const google_protobuf_FieldOptions *options; upb_strview name; const char *full_name; + const char *json_name; const char *shortname; uint32_t field_number; @@ -1324,6 +1353,13 @@ static bool create_fielddef( full_name = makefullname(ctx, prefix, name); shortname = shortdefname(full_name); + if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { + json_name = strviewdup( + ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); + } else { + json_name = makejsonname(shortname, ctx->alloc); + } + field_number = google_protobuf_FieldDescriptorProto_number(field_proto); if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { @@ -1333,26 +1369,42 @@ static bool create_fielddef( if (m) { /* direct message field. */ - upb_value v, packed_v; + upb_value v, field_v, json_v; + size_t json_size; f = (upb_fielddef*)&m->fields[m->field_count++]; f->msgdef = m; f->is_extension_ = false; - packed_v = pack_def(f, UPB_DEFTYPE_FIELD); - v = upb_value_constptr(f); - - if (!upb_strtable_insert3(&m->ntof, name.data, name.size, packed_v, alloc)) { + if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname); return false; } - if (!upb_inttable_insert2(&m->itof, field_number, v, alloc)) { + if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { + upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name); + return false; + } + + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { upb_status_seterrf(ctx->status, "duplicate field number (%u)", field_number); return false; } + field_v = pack_def(f, UPB_DEFTYPE_FIELD); + json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME); + v = upb_value_constptr(f); + json_size = strlen(json_name); + + CHK_OOM( + upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); + CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + + if (strcmp(shortname, json_name) != 0) { + upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + } + if (ctx->layouts) { const upb_msglayout_field *fields = m->layout->fields; int count = m->layout->field_count; @@ -1369,12 +1421,13 @@ static bool create_fielddef( } } else { /* extension field. */ - f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count]; + f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; f->is_extension_ = true; CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD))); } f->full_name = full_name; + f->json_name = json_name; f->file = ctx->file; f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); @@ -1741,7 +1794,8 @@ static bool build_filedef( } else if (streql_view(syntax, "proto3")) { file->syntax = UPB_SYNTAX_PROTO3; } else { - upb_status_seterrf(ctx->status, "Invalid syntax '%s'", syntax); + upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(syntax)); return false; } } else { diff --git a/upb/def.h b/upb/def.h index 5bc361e28bc..48e113dffee 100644 --- a/upb/def.h +++ b/upb/def.h @@ -99,10 +99,10 @@ upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f); upb_label_t upb_fielddef_label(const upb_fielddef *f); uint32_t upb_fielddef_number(const upb_fielddef *f); const char *upb_fielddef_name(const upb_fielddef *f); +const char *upb_fielddef_jsonname(const upb_fielddef *f); bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); bool upb_fielddef_packed(const upb_fielddef *f); -size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f); uint32_t upb_fielddef_index(const upb_fielddef *f); @@ -151,32 +151,10 @@ class upb::FieldDefPtr { Type type() const { return upb_fielddef_type(ptr_); } Label label() const { return upb_fielddef_label(ptr_); } const char* name() const { return upb_fielddef_name(ptr_); } + const char* json_name() const { return upb_fielddef_jsonname(ptr_); } uint32_t number() const { return upb_fielddef_number(ptr_); } bool is_extension() const { return upb_fielddef_isextension(ptr_); } - /* Copies the JSON name for this field into the given buffer. Returns the - * actual size of the JSON name, including the NULL terminator. If the - * return value is 0, the JSON name is unset. If the return value is - * greater than len, the JSON name was truncated. The buffer is always - * NULL-terminated if len > 0. - * - * The JSON name always defaults to a camelCased version of the regular - * name. However if the regular name is unset, the JSON name will be unset - * also. - */ - size_t GetJsonName(char *buf, size_t len) const { - return upb_fielddef_getjsonname(ptr_, buf, len); - } - - /* Convenience version of the above function which copies the JSON name - * into the given string, returning false if the name is not set. */ - template - bool GetJsonName(T* str) { - str->resize(GetJsonName(NULL, 0)); - GetJsonName(&(*str)[0], str->size()); - return str->size() > 0; - } - /* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false, * indicates whether this field should have lazy parsing handlers that yield * the unparsed string for the submessage. @@ -455,6 +433,10 @@ UPB_INLINE bool upb_msgdef_lookupnamez(const upb_msgdef *m, const char *name, return upb_msgdef_lookupname(m, name, strlen(name), f, o); } +/* Returns a field by either JSON name or regular proto name. */ +const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, + const char *name, size_t len); + /* Iteration over fields and oneofs. For example: * * upb_msg_field_iter i; diff --git a/upb/json/parser.rl b/upb/json/parser.rl index de381b25712..19022df298a 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -2874,15 +2874,13 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_value v = upb_value_constptr(f); - char *buf; + const char *name; /* Add an entry for the JSON name. */ - size_t len = upb_fielddef_getjsonname(f, NULL, 0); - buf = upb_malloc(alloc, len); - upb_fielddef_getjsonname(f, buf, len); - upb_strtable_insert3(&m->name_table, buf, strlen(buf), v, alloc); + name = upb_fielddef_jsonname(f); + upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - if (strcmp(buf, upb_fielddef_name(f)) != 0) { + if (strcmp(name, upb_fielddef_name(f)) != 0) { /* Since the JSON name is different from the regular field name, add an * entry for the raw name (compliant proto3 JSON parsers must accept * both). */ diff --git a/upb/json/printer.c b/upb/json/printer.c index 930ee441f6f..b0030e0e525 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -65,12 +65,8 @@ strpc *newstrpc(upb_handlers *h, const upb_fielddef *f, ret->ptr = upb_gstrdup(upb_fielddef_name(f)); ret->len = strlen(ret->ptr); } else { - size_t len; - ret->len = upb_fielddef_getjsonname(f, NULL, 0); - ret->ptr = upb_gmalloc(ret->len); - len = upb_fielddef_getjsonname(f, ret->ptr, ret->len); - UPB_ASSERT(len == ret->len); - ret->len--; /* NULL */ + ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f)); + ret->len = strlen(ret->ptr); } upb_handlers_addcleanup(h, ret, freestrpc); diff --git a/upb/json_decode.c b/upb/json_decode.c new file mode 100644 index 00000000000..54a55ad135c --- /dev/null +++ b/upb/json_decode.c @@ -0,0 +1,1405 @@ + +#include "upb/json_decode.h" + +#include +#include +#include +#include +#include +#include + +#include "upb/encode.h" +#include "upb/reflection.h" + +/* Special header, must be included last. */ +#include "upb/port_def.inc" + +typedef struct { + const char *ptr, *end; + upb_arena *arena; /* TODO: should we have a tmp arena for tmp data? */ + const upb_symtab *any_pool; + int depth; + upb_status *status; + jmp_buf err; + int line; + const char *line_begin; + bool is_first; + int options; + const upb_fielddef *debug_field; +} jsondec; + +enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL }; + +/* Forward declarations of mutually-recursive functions. */ +static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m); +static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f); +static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg, + const upb_msgdef *m); +static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m); + +static bool jsondec_streql(upb_strview str, const char *lit) { + return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0; +} + +UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) { + upb_status_seterrmsg(d->status, msg); + longjmp(d->err, 1); +} + +UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_status_vseterrf(d->status, fmt, argp); + va_end(argp); + longjmp(d->err, 1); +} + +static void jsondec_skipws(jsondec *d) { + while (d->ptr != d->end) { + switch (*d->ptr) { + case '\n': + d->line++; + d->line_begin = d->ptr; + /* Fallthrough. */ + case '\r': + case '\t': + case ' ': + d->ptr++; + break; + default: + return; + } + } + jsondec_err(d, "Unexpected EOF"); +} + +static bool jsondec_tryparsech(jsondec *d, char ch) { + if (d->ptr == d->end || *d->ptr != ch) return false; + d->ptr++; + return true; +} + +static void jsondec_parselit(jsondec *d, const char *lit) { + size_t len = strlen(lit); + if (d->end - d->ptr < len || memcmp(d->ptr, lit, len) != 0) { + jsondec_errf(d, "Expected: '%s'", lit); + } + d->ptr += len; +} + +static void jsondec_wsch(jsondec *d, char ch) { + jsondec_skipws(d); + if (!jsondec_tryparsech(d, ch)) { + jsondec_errf(d, "Expected: '%c'", ch); + } +} + +static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); } +static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); } +static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); } + +static void jsondec_entrysep(jsondec *d) { + jsondec_skipws(d); + jsondec_parselit(d, ":"); +} + +static int jsondec_rawpeek(jsondec *d) { + switch (*d->ptr) { + case '{': + return JD_OBJECT; + case '[': + return JD_ARRAY; + case '"': + return JD_STRING; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return JD_NUMBER; + case 't': + return JD_TRUE; + case 'f': + return JD_FALSE; + case 'n': + return JD_NULL; + default: + jsondec_errf(d, "Unexpected character: '%c'", *d->ptr); + } +} + +/* JSON object/array **********************************************************/ + +/* These are used like so: + * + * jsondec_objstart(d); + * while (jsondec_objnext(d)) { + * ... + * } + * jsondec_objend(d) */ + +static int jsondec_peek(jsondec *d) { + jsondec_skipws(d); + return jsondec_rawpeek(d); +} + +static void jsondec_push(jsondec *d) { + if (--d->depth < 0) { + jsondec_err(d, "Recursion limit exceeded"); + } + d->is_first = true; +} + +static bool jsondec_seqnext(jsondec *d, char end_ch) { + jsondec_skipws(d); + if (*d->ptr == end_ch) return false; + + if (d->is_first) { + d->is_first = false; + } else { + jsondec_parselit(d, ","); + } + + return true; +} + +static void jsondec_arrstart(jsondec *d) { + jsondec_push(d); + jsondec_wsch(d, '['); +} + +static void jsondec_arrend(jsondec *d) { + d->depth++; + jsondec_wsch(d, ']'); +} + +static bool jsondec_arrnext(jsondec *d) { + return jsondec_seqnext(d, ']'); +} + +static void jsondec_objstart(jsondec *d) { + jsondec_push(d); + jsondec_wsch(d, '{'); +} + +static void jsondec_objend(jsondec *d) { + d->depth++; + jsondec_wsch(d, '}'); +} + +static bool jsondec_objnext(jsondec *d) { + if (!jsondec_seqnext(d, '}')) return false; + if (jsondec_peek(d) != JD_STRING) { + jsondec_err(d, "Object must start with string"); + } + return true; +} + +/* JSON number ****************************************************************/ + +static bool jsondec_tryskipdigits(jsondec *d) { + const char *start = d->ptr; + + while (d->ptr < d->end) { + if (*d->ptr < '0' || *d->ptr > '9') { + break; + } + d->ptr++; + } + + return d->ptr != start; +} + +static void jsondec_skipdigits(jsondec *d) { + if (!jsondec_tryskipdigits(d)) { + jsondec_err(d, "Expected one or more digits"); + } +} + +static double jsondec_number(jsondec *d) { + const char *start = d->ptr; + + assert(jsondec_rawpeek(d) == JD_NUMBER); + + /* Skip over the syntax of a number, as specified by JSON. */ + if (*d->ptr == '-') d->ptr++; + + if (jsondec_tryparsech(d, '0')) { + if (jsondec_tryskipdigits(d)) { + jsondec_err(d, "number cannot have leading zero"); + } + } else { + jsondec_skipdigits(d); + } + + if (d->ptr == d->end) goto parse; + if (jsondec_tryparsech(d, '.')) { + jsondec_skipdigits(d); + } + if (d->ptr == d->end) goto parse; + + if (*d->ptr == 'e' || *d->ptr == 'E') { + d->ptr++; + if (d->ptr == d->end) { + jsondec_err(d, "Unexpected EOF in number"); + } + if (*d->ptr == '+' || *d->ptr == '-') { + d->ptr++; + } + jsondec_skipdigits(d); + } + +parse: + /* Having verified the syntax of a JSON number, use strtod() to parse + * (strtod() accepts a superset of JSON syntax). */ + errno = 0; + { + char* end; + double val = strtod(start, &end); + assert(end == d->ptr); + + /* Currently the min/max-val conformance tests fail if we check this. Does + * this mean the conformance tests are wrong or strtod() is wrong, or + * something else? Investigate further. */ + /* + if (errno == ERANGE) { + jsondec_err(d, "Number out of range"); + } + */ + + if (val > DBL_MAX || val < -DBL_MAX) { + jsondec_err(d, "Number out of range"); + } + + return val; + } +} + +/* JSON string ****************************************************************/ + +static char jsondec_escape(jsondec *d) { + switch (*d->ptr++) { + case '"': + return '\"'; + case '\\': + return '\\'; + case '/': + return '/'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + default: + jsondec_err(d, "Invalid escape char"); + } +} + +static uint32_t jsondec_codepoint(jsondec *d) { + uint32_t cp = 0; + const char *end; + + if (d->end - d->ptr < 4) { + jsondec_err(d, "EOF inside string"); + } + + end = d->ptr + 4; + while (d->ptr < end) { + char ch = *d->ptr++; + if (ch >= '0' && ch <= '9') { + ch -= '0'; + } else if (ch >= 'a' && ch <= 'f') { + ch = ch - 'a' + 10; + } else if (ch >= 'A' && ch <= 'F') { + ch = ch - 'A' + 10; + } else { + jsondec_err(d, "Invalid hex digit"); + } + cp = (cp << 4) | ch; + } + + return cp; +} + +/* Parses a \uXXXX unicode escape (possibly a surrogate pair). */ +static size_t jsondec_unicode(jsondec *d, char* out) { + uint32_t cp = jsondec_codepoint(d); + if (cp >= 0xd800 && cp <= 0xdbff) { + /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */ + uint32_t high = cp; + uint32_t low; + jsondec_parselit(d, "\\u"); + low = jsondec_codepoint(d); + if (low < 0xdc00 || low > 0xdfff) { + jsondec_err(d, "Invalid low surrogate"); + } + cp = (high & 0x3ff) << 10; + cp |= (low & 0x3ff); + cp += 0x10000; + } else if (cp >= 0xdc00 && cp <= 0xdfff) { + jsondec_err(d, "Unpaired low surrogate"); + } + + /* Write to UTF-8 */ + if (cp <= 0x7f) { + out[0] = cp; + return 1; + } else if (cp <= 0x07FF) { + out[0] = ((cp >> 6) & 0x1F) | 0xC0; + out[1] = ((cp >> 0) & 0x3F) | 0x80; + return 2; + } else if (cp <= 0xFFFF) { + out[0] = ((cp >> 12) & 0x0F) | 0xE0; + out[1] = ((cp >> 6) & 0x3F) | 0x80; + out[2] = ((cp >> 0) & 0x3F) | 0x80; + return 3; + } else if (cp < 0x10FFFF) { + out[0] = ((cp >> 18) & 0x07) | 0xF0; + out[1] = ((cp >> 12) & 0x3f) | 0x80; + out[2] = ((cp >> 6) & 0x3f) | 0x80; + out[3] = ((cp >> 0) & 0x3f) | 0x80; + return 4; + } else { + jsondec_err(d, "Invalid codepoint"); + } +} + +static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) { + size_t oldsize = *buf_end - *buf; + size_t len = *end - *buf; + size_t size = UPB_MAX(8, 2 * oldsize); + + *buf = upb_arena_realloc(d->arena, *buf, len, size); + *end = *buf + len; + *buf_end = *buf + size; +} + +static upb_strview jsondec_string(jsondec *d) { + char *buf = NULL; + char *end = NULL; + char *buf_end = NULL; + + jsondec_skipws(d); + + if (*d->ptr++ != '"') { + jsondec_err(d, "Expected string"); + } + + while (d->ptr < d->end) { + char ch = *d->ptr++; + + if (end == buf_end) { + jsondec_resize(d, &buf, &end, &buf_end); + } + + switch (ch) { + case '"': { + upb_strview ret = {buf, end - buf}; + return ret; + } + case '\\': + if (d->ptr == d->end) goto eof; + if (*d->ptr == 'u') { + d->ptr++; + if (buf_end - end < 4) { + // Allow space for maximum-sized code point (4 bytes). + jsondec_resize(d, &buf, &end, &buf_end); + } + end += jsondec_unicode(d, end); + } else { + *end++ = jsondec_escape(d); + } + break; + default: + if ((unsigned char)*d->ptr < 0x20) { + jsondec_err(d, "Invalid char in JSON string"); + } + *end++ = ch; + break; + } + } + +eof: + jsondec_err(d, "EOF inside string"); +} + +static void jsondec_skipval(jsondec *d) { + switch (jsondec_peek(d)) { + case JD_OBJECT: + jsondec_objstart(d); + while (jsondec_objnext(d)) { + jsondec_string(d); + jsondec_entrysep(d); + jsondec_skipval(d); + } + jsondec_objend(d); + break; + case JD_ARRAY: + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + jsondec_skipval(d); + } + jsondec_arrend(d); + break; + case JD_TRUE: + jsondec_true(d); + break; + case JD_FALSE: + jsondec_false(d); + break; + case JD_NULL: + jsondec_null(d); + break; + case JD_STRING: + jsondec_string(d); + break; + case JD_NUMBER: + jsondec_number(d); + break; + } +} + +/* Base64 decoding for bytes fields. ******************************************/ + +static int jsondec_base64_tablelookup(const char ch) { + /* Table includes the normal base64 chars plus the URL-safe variant. */ + const signed char table[256] = { + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/, + 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/, + 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1, + -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/, + 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/, + 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/, + 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/, + -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/, + 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/, + 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/, + 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/, + 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1}; + + /* Sign-extend return value so high bit will be set on any unexpected char. */ + return table[(unsigned)ch]; +} + +static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end, + char *out) { + int32_t val = -1; + + switch (end - ptr) { + case 2: + val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12; + out[0] = val >> 16; + out += 1; + break; + case 3: + val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12 | + jsondec_base64_tablelookup(ptr[2]) << 6; + out[0] = val >> 16; + out[1] = (val >> 8) & 0xff; + out += 2; + break; + } + + if (val < 0) { + jsondec_err(d, "Corrupt base64"); + } + + return out; +} + +static size_t jsondec_base64(jsondec *d, upb_strview str) { + /* We decode in place. This is safe because this is a new buffer (not + * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */ + char *out = (char*)str.data; + const char *ptr = str.data; + const char *end = ptr + str.size; + const char *end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */ + + for (; ptr < end4; ptr += 4, out += 3) { + int val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12 | + jsondec_base64_tablelookup(ptr[2]) << 6 | + jsondec_base64_tablelookup(ptr[3]) << 0; + + if (val < 0) { + /* Junk chars or padding. Remove trailing padding, if any. */ + if (end - ptr == 4 && ptr[3] == '=') { + if (ptr[2] == '=') { + end -= 2; + } else { + end -= 1; + } + } + break; + } + + out[0] = val >> 16; + out[1] = (val >> 8) & 0xff; + out[2] = val & 0xff; + } + + if (ptr < end) { + /* Process remaining chars. We do not require padding. */ + out = jsondec_partialbase64(d, ptr, end, out); + } + + return out - str.data; +} + +/* Low-level integer parsing **************************************************/ + +/* We use these hand-written routines instead of strto[u]l() because the "long + * long" variants aren't in c89. Also our version allows setting a ptr limit. */ + +static const char *jsondec_buftouint64(jsondec *d, const char *ptr, + const char *end, uint64_t *val) { + uint64_t u64 = 0; + while (ptr < end) { + unsigned ch = *ptr - '0'; + if (ch >= 10) break; + if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) { + jsondec_err(d, "Integer overflow"); + } + u64 *= 10; + u64 += ch; + ptr++; + } + + *val = u64; + return ptr; +} + +static const char *jsondec_buftoint64(jsondec *d, const char *ptr, + const char *end, int64_t *val) { + bool neg = false; + uint64_t u64; + + if (ptr != end && *ptr == '-') { + ptr++; + neg = true; + } + + ptr = jsondec_buftouint64(d, ptr, end, &u64); + if (u64 > (uint64_t)INT64_MAX + neg) { + jsondec_err(d, "Integer overflow"); + } + + *val = neg ? -u64 : u64; + return ptr; +} + +static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) { + const char *end = str.data + str.size; + uint64_t ret; + if (jsondec_buftouint64(d, str.data, end, &ret) != end) { + jsondec_err(d, "Non-number characters in quoted integer"); + } + return ret; +} + +static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) { + const char *end = str.data + str.size; + int64_t ret; + if (jsondec_buftoint64(d, str.data, end, &ret) != end) { + jsondec_err(d, "Non-number characters in quoted integer"); + } + return ret; +} + +/* Primitive value types ******************************************************/ + +/* Parse INT32 or INT64 value. */ +static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: { + double dbl = jsondec_number(d); + if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) { + jsondec_err(d, "JSON number is out of range."); + } + val.int64_val = dbl; /* must be guarded, overflow here is UB */ + if (val.int64_val != dbl) { + jsondec_errf(d, "JSON number was not integral (%d != %" PRId64 ")", dbl, + val.int64_val); + } + break; + } + case JD_STRING: { + upb_strview str = jsondec_string(d); + val.int64_val = jsondec_strtoint64(d, str); + break; + } + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_INT32) { + if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) { + jsondec_err(d, "Integer out of range."); + } + val.int32_val = val.int64_val; + } + + return val; +} + +/* Parse UINT32 or UINT64 value. */ +static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: { + double dbl = jsondec_number(d); + if (dbl > 18446744073709549568.0 || dbl < 0) { + jsondec_err(d, "JSON number is out of range."); + } + val.uint64_val = dbl; /* must be guarded, overflow here is UB */ + if (val.uint64_val != dbl) { + jsondec_errf(d, "JSON number was not integral (%d != %" PRIu64 ")", dbl, + val.uint64_val); + } + break; + } + case JD_STRING: { + upb_strview str = jsondec_string(d); + val.uint64_val = jsondec_strtouint64(d, str); + break; + } + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_UINT32) { + if (val.uint64_val > UINT32_MAX) { + jsondec_err(d, "Integer out of range."); + } + val.uint32_val = val.uint64_val; + } + + return val; +} + +/* Parse DOUBLE or FLOAT value. */ +static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) { + upb_strview str; + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: + val.double_val = jsondec_number(d); + break; + case JD_STRING: + str = jsondec_string(d); + if (jsondec_streql(str, "NaN")) { + val.double_val = 0.0 / 0.0; + } else if (jsondec_streql(str, "Infinity")) { + val.double_val = UPB_INFINITY; + } else if (jsondec_streql(str, "-Infinity")) { + val.double_val = -UPB_INFINITY; + } else { + val.double_val = strtod(str.data, NULL); + } + break; + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) { + if (val.double_val != UPB_INFINITY && val.double_val != -UPB_INFINITY && + (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) { + jsondec_err(d, "Float out of range"); + } + val.float_val = val.double_val; + } + + return val; +} + +/* Parse STRING or BYTES value. */ +static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + val.str_val = jsondec_string(d); + if (upb_fielddef_type(f) == UPB_TYPE_BYTES) { + val.str_val.size = jsondec_base64(d, val.str_val); + } + return val; +} + +static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) { + if (jsondec_peek(d) == JD_STRING) { + const upb_enumdef *e = upb_fielddef_enumsubdef(f); + upb_strview str = jsondec_string(d); + upb_msgval val; + if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) { + jsondec_err(d, "Unknown enumerator"); + } + return val; + } else { + return jsondec_int(d, f); + } +} + +static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) { + bool is_map_key = upb_fielddef_number(f) == 1 && + upb_msgdef_mapentry(upb_fielddef_containingtype(f)); + upb_msgval val; + + if (is_map_key) { + upb_strview str = jsondec_string(d); + if (jsondec_streql(str, "true")) { + val.bool_val = true; + } else if (jsondec_streql(str, "false")) { + val.bool_val = false; + } else { + jsondec_err(d, "Invalid boolean map key"); + } + } else { + switch (jsondec_peek(d)) { + case JD_TRUE: + val.bool_val = true; + jsondec_true(d); + break; + case JD_FALSE: + val.bool_val = false; + jsondec_false(d); + break; + default: + jsondec_err(d, "Expected true or false"); + } + } + + return val; +} + +/* Composite types (array/message/map) ****************************************/ + +static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) { + upb_array *arr = upb_msg_mutable(msg, f, d->arena).array; + + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + upb_msgval elem = jsondec_value(d, f); + upb_array_append(arr, elem, d->arena); + } + jsondec_arrend(d); +} + +static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) { + upb_map *map = upb_msg_mutable(msg, f, d->arena).map; + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); + const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); + + jsondec_objstart(d); + while (jsondec_objnext(d)) { + upb_msgval key, val; + key = jsondec_value(d, key_f); + jsondec_entrysep(d); + val = jsondec_value(d, val_f); + upb_map_set(map, key, val, d->arena); + } + jsondec_objend(d); +} + +static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + jsondec_object(d, msg, m); + } else { + jsondec_wellknown(d, msg, m); + } +} + +static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) { + const upb_msgdef *m = upb_fielddef_msgsubdef(f); + upb_msg *msg = upb_msg_new(m, d->arena); + upb_msgval val; + + jsondec_tomsg(d, msg, m); + val.msg_val = msg; + return val; +} + +static bool jsondec_isvalue(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_MESSAGE && + upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) == + UPB_WELLKNOWN_VALUE; +} + +static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_strview name; + const upb_fielddef *f; + const upb_fielddef *preserved; + + name = jsondec_string(d); + jsondec_entrysep(d); + f = upb_msgdef_lookupjsonname(m, name.data, name.size); + + if (!f) { + if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) { + jsondec_err(d, "Unknown field"); + } + jsondec_skipval(d); + return; + } + + if (upb_fielddef_containingoneof(f) && + upb_msg_hasoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + + if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { + /* JSON "null" indicates a default value, so no need to set anything. */ + return jsondec_null(d); + } + + preserved = d->debug_field; + d->debug_field = f; + + if (upb_fielddef_ismap(f)) { + jsondec_map(d, msg, f); + } else if (upb_fielddef_isseq(f)) { + jsondec_array(d, msg, f); + } else if (upb_fielddef_issubmsg(f)) { + upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg; + const upb_msgdef *subm = upb_fielddef_msgsubdef(f); + jsondec_tomsg(d, submsg, subm); + } else { + upb_msgval val = jsondec_value(d, f); + upb_msg_set(msg, f, val, d->arena); + } + + d->debug_field = preserved; +} + +static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + jsondec_objstart(d); + while (jsondec_objnext(d)) jsondec_field(d, msg, m); + jsondec_objend(d); +} + +static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + return jsondec_bool(d, f); + case UPB_TYPE_FLOAT: + case UPB_TYPE_DOUBLE: + return jsondec_double(d, f); + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + return jsondec_uint(d, f); + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + return jsondec_int(d, f); + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return jsondec_strfield(d, f); + case UPB_TYPE_ENUM: + return jsondec_enum(d, f); + case UPB_TYPE_MESSAGE: + return jsondec_msg(d, f); + default: + UPB_UNREACHABLE(); + } +} + +/* Well-known types ***********************************************************/ + +static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits, + const char *after) { + uint64_t val; + const char *p = *ptr; + const char *end = p + digits; + size_t after_len = after ? strlen(after) : 0; + + assert(digits <= 9); /* int can't overflow. */ + + if (jsondec_buftouint64(d, p, end, &val) != end || + (after_len && memcmp(end, after, after_len) != 0)) { + jsondec_err(d, "Malformed timestamp"); + } + + *ptr = end + after_len; + return val; +} + +static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) { + uint64_t nanos = 0; + const char *p = *ptr; + + if (p != end && *p == '.') { + const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos); + int digits = nano_end - p - 1; + int exp_lg10 = 9 - digits; + if (digits > 9) { + jsondec_err(d, "Too many digits for partial seconds"); + } + while (exp_lg10--) nanos *= 10; + *ptr = nano_end; + } + + return nanos; +} + +// jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0 +static int jsondec_epochdays(int y, int m, int d) { + unsigned year_base = 4800; /* Before minimum year, divisible by 100 & 400 */ + unsigned epoch = 2472632; /* Days between year_base and 1970 (Unix epoch) */ + unsigned carry = (unsigned)m - 3 > m; + unsigned m_adj = m - 3 + (carry ? 12 : 0); /* Month, counting from March */ + unsigned y_adj = y + year_base - carry; /* Year, positive and March-based */ + unsigned base_days = (365 * 4 + 1) * y_adj / 4; /* Approx days for year */ + unsigned centuries = y_adj / 100; + unsigned extra_leap_days = (3 * centuries + 3) / 4; /* base_days correction */ + unsigned year_days = (367 * (m_adj + 1)) / 12 - 30; /* Counting from March */ + return base_days - extra_leap_days + year_days + (d - 1) - epoch; +} + +static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) { + return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s; +} + +static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_msgval seconds; + upb_msgval nanos; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + + if (str.size < 20) goto malformed; + + { + // 1972-01-01T01:00:00 + int year = jsondec_tsdigits(d, &ptr, 4, "-"); + int mon = jsondec_tsdigits(d, &ptr, 2, "-"); + int day = jsondec_tsdigits(d, &ptr, 2, "T"); + int hour = jsondec_tsdigits(d, &ptr, 2, ":"); + int min = jsondec_tsdigits(d, &ptr, 2, ":"); + int sec = jsondec_tsdigits(d, &ptr, 2, NULL); + + seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec); + } + + nanos.int32_val = jsondec_nanos(d, &ptr, end); + + { + // [+-]08:00 or Z + int ofs = 0; + bool neg = false; + + if (ptr == end) goto malformed; + + switch (*ptr++) { + case '-': + neg = true; + /* Fallthrough intended. */ + case '+': + if ((end - ptr) != 5) goto malformed; + ofs = jsondec_tsdigits(d, &ptr, 2, ":00"); + ofs *= 60 * 60; + seconds.int64_val += (neg ? ofs : -ofs); + break; + case 'Z': + if (ptr != end) goto malformed; + break; + default: + goto malformed; + } + } + + if (seconds.int64_val < -62135596800) { + jsondec_err(d, "Timestamp out of range"); + } + + upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena); + upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena); + return; + +malformed: + jsondec_err(d, "Malformed timestamp"); +} + +static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_msgval seconds; + upb_msgval nanos; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + + // "3.000000001s", "3s", etc. + ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val); + nanos.int32_val = jsondec_nanos(d, &ptr, end); + + if (end - ptr != 1 || *ptr != 's') { + jsondec_err(d, "Malformed duration"); + } + + if (seconds.int64_val < -315576000000LL || seconds.int64_val > 315576000000LL) { + jsondec_err(d, "Duration out of range"); + } + + if (seconds.int64_val < 0) { + nanos.int32_val = - nanos.int32_val; + } + + upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena); + upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena); +} + +static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *values_f = upb_msgdef_itof(m, 1); + const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f); + upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array; + + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + upb_msg *value_msg = upb_msg_new(value_m, d->arena); + upb_msgval value; + value.msg_val = value_msg; + upb_array_append(values, value, d->arena); + jsondec_wellknownvalue(d, value_msg, value_m); + } + jsondec_arrend(d); +} + +static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *fields_f = upb_msgdef_itof(m, 1); + const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f); + const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2); + const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f); + upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map; + + jsondec_objstart(d); + while (jsondec_objnext(d)) { + upb_msgval key, value; + upb_msg *value_msg = upb_msg_new(value_m, d->arena); + key.str_val = jsondec_string(d); + value.msg_val = value_msg; + upb_map_set(fields, key, value, d->arena); + jsondec_entrysep(d); + jsondec_wellknownvalue(d, value_msg, value_m); + } + jsondec_objend(d); +} + +static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg, + const upb_msgdef *m) { + upb_msgval val; + const upb_fielddef *f; + upb_msg *submsg; + + switch (jsondec_peek(d)) { + case JD_NUMBER: + /* double number_value = 2; */ + f = upb_msgdef_itof(m, 2); + val.double_val = jsondec_number(d); + break; + case JD_STRING: + /* string string_value = 3; */ + f = upb_msgdef_itof(m, 3); + val.str_val = jsondec_string(d); + break; + case JD_FALSE: + /* bool bool_value = 4; */ + f = upb_msgdef_itof(m, 4); + val.bool_val = false; + jsondec_false(d); + break; + case JD_TRUE: + /* bool bool_value = 4; */ + f = upb_msgdef_itof(m, 4); + val.bool_val = true; + jsondec_true(d); + break; + case JD_NULL: + /* NullValue null_value = 1; */ + f = upb_msgdef_itof(m, 1); + val.int32_val = 0; + jsondec_null(d); + break; + /* Note: these cases return, because upb_msg_mutable() is enough. */ + case JD_OBJECT: + /* Struct struct_value = 5; */ + f = upb_msgdef_itof(m, 5); + submsg = upb_msg_mutable(msg, f, d->arena).msg; + jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f)); + return; + case JD_ARRAY: + /* ListValue list_value = 6; */ + f = upb_msgdef_itof(m, 6); + submsg = upb_msg_mutable(msg, f, d->arena).msg; + jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f)); + return; + default: + UPB_UNREACHABLE(); + } + + upb_msg_set(msg, f, val, d->arena); +} + +static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) { + /* FieldMask fields grow due to inserted '_' characters, so we can't do the + * transform in place. */ + const char *ptr = buf; + upb_strview ret; + char *out; + + ret.size = end - ptr; + while (ptr < end) { + ret.size += (*ptr >= 'A' && *ptr <= 'Z'); + ptr++; + } + + out = upb_arena_malloc(d->arena, ret.size); + ptr = buf; + ret.data = out; + + while (ptr < end) { + char ch = *ptr++; + if (ch >= 'A' && ch <= 'Z') { + *out++ = '_'; + *out++ = ch + 32; + } else if (ch == '_') { + jsondec_err(d, "field mask may not contain '_'"); + } else { + *out++ = ch; + } + } + + return ret; +} + +static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + /* repeated string paths = 1; */ + const upb_fielddef *paths_f = upb_msgdef_itof(m, 1); + upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + upb_msgval val; + + while (ptr < end) { + const char *elem_end = memchr(ptr, ',', end - ptr); + if (elem_end) { + val.str_val = jsondec_mask(d, ptr, elem_end); + ptr = elem_end + 1; + } else { + val.str_val = jsondec_mask(d, ptr, end); + ptr = end; + } + upb_array_append(arr, val, d->arena); + } +} + +static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + /* For regular types: {"@type": "[user type]", "f1": , "f2": } + * where f1, f2, etc. are the normal fields of this type. */ + jsondec_field(d, msg, m); + } else { + /* For well-known types: {"@type": "[well-known type]", "value": } + * where is whatever encoding the WKT normally uses. */ + upb_strview str = jsondec_string(d); + jsondec_entrysep(d); + if (!jsondec_streql(str, "value")) { + jsondec_err(d, "Key for well-known type must be 'value'"); + } + jsondec_wellknown(d, msg, m); + } +} + +static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); + const upb_msgdef *type_m; + upb_strview type_url = jsondec_string(d); + const char *end = type_url.data + type_url.size; + const char *ptr = end; + upb_msgval val; + + val.str_val = type_url; + upb_msg_set(msg, type_url_f, val, d->arena); + + /* Find message name after the last '/' */ + while (ptr > type_url.data && *--ptr != '/') {} + + if (ptr == type_url.data || ptr == end) { + jsondec_err(d, "Type url must have at least one '/' and non-empty host"); + } + + ptr++; + type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr); + + if (!type_m) { + jsondec_err(d, "Type was not found"); + } + + return type_m; +} + +static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + /* string type_url = 1; + * bytes value = 2; */ + const upb_fielddef *value_f = upb_msgdef_itof(m, 2); + upb_msg *any_msg; + const upb_msgdef *any_m = NULL; + const char *pre_type_data = NULL; + const char *pre_type_end = NULL; + upb_msgval encoded; + + jsondec_objstart(d); + + /* Scan looking for "@type", which is not necessarily first. */ + while (!any_m && jsondec_objnext(d)) { + const char *start = d->ptr; + upb_strview name = jsondec_string(d); + jsondec_entrysep(d); + if (jsondec_streql(name, "@type")) { + any_m = jsondec_typeurl(d, msg, m); + if (pre_type_data) { + pre_type_end = start; + while (*pre_type_end != ',') pre_type_end--; + } + } else { + if (!pre_type_data) pre_type_data = start; + jsondec_skipval(d); + } + } + + if (!any_m) { + jsondec_err(d, "Any object didn't contain a '@type' field"); + } + + any_msg = upb_msg_new(any_m, d->arena); + + if (pre_type_data) { + size_t len = pre_type_end - pre_type_data + 1; + char *tmp = upb_arena_malloc(d->arena, len); + memcpy(tmp, pre_type_data, len - 1); + tmp[len - 1] = '}'; + const char *saved_ptr = d->ptr; + const char *saved_end = d->end; + d->ptr = tmp; + d->end = tmp + len; + d->is_first = true; + while (jsondec_objnext(d)) { + jsondec_anyfield(d, any_msg, any_m); + } + d->ptr = saved_ptr; + d->end = saved_end; + } + + while (jsondec_objnext(d)) { + jsondec_anyfield(d, any_msg, any_m); + } + + jsondec_objend(d); + + encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena, + &encoded.str_val.size); + upb_msg_set(msg, value_f, encoded, d->arena); +} + +static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *value_f = upb_msgdef_itof(m, 1); + upb_msgval val = jsondec_value(d, value_f); + upb_msg_set(msg, value_f, val, d->arena); +} + +static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + switch (upb_msgdef_wellknowntype(m)) { + case UPB_WELLKNOWN_ANY: + jsondec_any(d, msg, m); + break; + case UPB_WELLKNOWN_FIELDMASK: + jsondec_fieldmask(d, msg, m); + break; + case UPB_WELLKNOWN_DURATION: + jsondec_duration(d, msg, m); + break; + case UPB_WELLKNOWN_TIMESTAMP: + jsondec_timestamp(d, msg, m); + break; + case UPB_WELLKNOWN_VALUE: + jsondec_wellknownvalue(d, msg, m); + break; + case UPB_WELLKNOWN_LISTVALUE: + jsondec_listvalue(d, msg, m); + break; + case UPB_WELLKNOWN_STRUCT: + jsondec_struct(d, msg, m); + break; + case UPB_WELLKNOWN_DOUBLEVALUE: + case UPB_WELLKNOWN_FLOATVALUE: + case UPB_WELLKNOWN_INT64VALUE: + case UPB_WELLKNOWN_UINT64VALUE: + case UPB_WELLKNOWN_INT32VALUE: + case UPB_WELLKNOWN_UINT32VALUE: + case UPB_WELLKNOWN_STRINGVALUE: + case UPB_WELLKNOWN_BYTESVALUE: + case UPB_WELLKNOWN_BOOLVALUE: + jsondec_wrapper(d, msg, m); + break; + default: + UPB_UNREACHABLE(); + } +} + +bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msgdef *m, const upb_symtab *any_pool, + int options, upb_arena *arena, upb_status *status) { + jsondec d; + d.ptr = buf; + d.end = buf + size; + d.arena = arena; + d.any_pool = any_pool; + d.status = status; + d.options = options; + d.depth = 64; + d.line = 1; + d.debug_field = NULL; + d.is_first = false; + + if (setjmp(d.err)) return false; + + jsondec_object(&d, msg, m); + return true; +} diff --git a/upb/json_decode.h b/upb/json_decode.h new file mode 100644 index 00000000000..01ab9580f05 --- /dev/null +++ b/upb/json_decode.h @@ -0,0 +1,24 @@ + +#ifndef UPB_JSONDECODE_H_ +#define UPB_JSONDECODE_H_ + +#include "upb/def.h" +#include "upb/msg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + UPB_JSONDEC_IGNOREUNKNOWN = 1 +}; + +bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msgdef *m, const upb_symtab *any_pool, + int options, upb_arena *arena, upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_JSONDECODE_H_ */ diff --git a/upb/json_encode.c b/upb/json_encode.c index d8adf1b722b..285bcecea5f 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -29,13 +29,23 @@ static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); +static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m); static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); -static void jsonenc_err(jsonenc *e, const char *msg) { +UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { upb_status_seterrmsg(e->status, msg); longjmp(e->err, 1); } +static upb_arena *jsonenc_arena(jsonenc *e) { + /* Create lazily, since it's only needed for Any */ + if (!e->arena) { + e->arena = upb_arena_new(); + } + return e->arena; +} + static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { size_t have = e->end - e->ptr; if (UPB_LIKELY(have >= len)) { @@ -70,19 +80,19 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { } static void jsonenc_nanos(jsonenc *e, int32_t nanos) { - const char zeros[3] = "000"; + int digits = 9; if (nanos == 0) return; if (nanos < 0 || nanos >= 1000000000) { jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos"); } - jsonenc_printf(e, "%09" PRId32, nanos); - - /* Remove trailing zeros, 3 at a time. */ - while ((e->ptr - e->buf) >= 3 && memcmp(e->ptr, zeros, 3) == 0) { - e->ptr -= 3; + while (nanos % 1000 == 0) { + nanos /= 1000; + digits -= 3; } + + jsonenc_printf(e, ".%0.*" PRId32, digits, nanos); } static void jsonenc_timestamp(jsonenc *e, const upb_msg *msg, @@ -107,7 +117,7 @@ static void jsonenc_timestamp(jsonenc *e, const upb_msg *msg, * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for * Processing Calendar Dates," Communications of the Association of * Computing Machines, vol. 11 (1968), p. 657. */ - L = (seconds / 86400) + 2440588; + L = (seconds / 86400) + 68569 + 2440588; N = 4 * L / 146097; L = L - (146097 * N + 3) / 4; I = 4000 * (L + 1) / 1461001; @@ -138,6 +148,10 @@ static void jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m jsonenc_err(e, "bad duration"); } + if (nanos < 0) { + nanos = -nanos; + } + jsonenc_printf(e, "\"%" PRId64, seconds); jsonenc_nanos(e, nanos); jsonenc_putstr(e, "s\""); @@ -158,8 +172,8 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { /* This is the regular base64, not the "web-safe" version. */ static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - const char *ptr = str.data; - const char *end = ptr + str.size; + const unsigned char *ptr = (unsigned char*)str.data; + const unsigned char *end = ptr + str.size; char buf[4]; jsonenc_putstr(e, "\""); @@ -212,10 +226,10 @@ static void jsonenc_stringbody(jsonenc *e, upb_strview str) { jsonenc_putstr(e, "\\\""); break; case '\f': - jsonenc_putstr(e, "\f'"); + jsonenc_putstr(e, "\\f"); break; case '\b': - jsonenc_putstr(e, "\b'"); + jsonenc_putstr(e, "\\b"); break; case '\\': jsonenc_putstr(e, "\\\\"); @@ -255,21 +269,22 @@ static void jsonenc_double(jsonenc *e, const char *fmt, double val) { static void jsonenc_wrapper(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { const upb_fielddef *val_f = upb_msgdef_itof(m, 1); - upb_msgval val = upb_msg_get(m, val_f); + upb_msgval val = upb_msg_get(msg, val_f); jsonenc_scalar(e, val, val_f); } -const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { +static const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { /* Find last '/', if any. */ const char *end = type_url.data + type_url.size; const char *ptr = end; + const upb_msgdef *ret; - if (!e->ext_pool || type_url.size == 0) return NULL; + if (!e->ext_pool || type_url.size == 0) goto badurl; while (true) { if (--ptr == type_url.data) { /* Type URL must contain at least one '/', with host before. */ - return NULL; + goto badurl; } if (*ptr == '/') { ptr++; @@ -277,19 +292,29 @@ const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { } } - return upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr); + ret = upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr); + + if (!ret) { + jsonenc_err(e, "Couldn't find Any type"); + } + + return ret; + +badurl: + jsonenc_err(e, "Bad type URL"); } static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); - const upb_fielddef *value_f = upb_msgdef_itof(m, 1); + const upb_fielddef *value_f = upb_msgdef_itof(m, 2); upb_strview type_url = upb_msg_get(msg, type_url_f).str_val; upb_strview value = upb_msg_get(msg, value_f).str_val; const upb_msgdef *any_m = jsonenc_getanymsg(e, type_url); const upb_msglayout *any_layout = upb_msgdef_layout(any_m); - upb_msg *any = upb_msg_new(any_m, e->arena); + upb_arena *arena = jsonenc_arena(e); + upb_msg *any = upb_msg_new(any_m, arena); - if (!upb_decode(value.data, value.size, any, any_layout, e->arena)) { + if (!upb_decode(value.data, value.size, any, any_layout, arena)) { jsonenc_err(e, "Error decoding message in Any"); } @@ -297,9 +322,9 @@ static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_string(e, type_url); jsonenc_putstr(e, ", "); - if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { /* Regular messages: {"@type": "...", "foo": 1, "bar": 2} */ - jsonenc_msg(e, any, any_m); + jsonenc_msgfields(e, any, any_m); } else { /* Well-known type: {"@type": "...", "value": } */ jsonenc_putstr(e, "value: "); @@ -323,15 +348,17 @@ static void jsonenc_fieldpath(jsonenc *e, upb_strview path) { while (ptr < end) { char ch = *ptr; + if (ch >= 'A' && ch <= 'Z') { jsonenc_err(e, "Field mask element may not have upper-case letter."); } else if (ch == '_') { if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') { jsonenc_err(e, "Underscore must be followed by a lowercase letter."); } - } else { - jsonenc_putbytes(e, &ch, 1); + ch = *++ptr - 32; } + + jsonenc_putbytes(e, &ch, 1); ptr++; } } @@ -468,7 +495,7 @@ static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, jsonenc_listvalue(e, msg, m); break; case UPB_WELLKNOWN_STRUCT: - jsonenc_listvalue(e, msg, m); + jsonenc_struct(e, msg, m); break; } } @@ -532,6 +559,7 @@ static void jsonenc_mapkey(jsonenc *e, upb_msgval val, const upb_fielddef *f) { break; case UPB_TYPE_STRING: jsonenc_stringbody(e, val.str_val); + break; default: UPB_UNREACHABLE(); } @@ -575,15 +603,12 @@ static void jsonenc_map(jsonenc *e, const upb_map *map, const upb_fielddef *f) { static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, upb_msgval val, bool *first) { - char buf[128]; const char *name; if (e->options & UPB_JSONENC_PROTONAMES) { name = upb_fielddef_name(f); } else { - /* TODO(haberman): we need a better JSON name API. */ - upb_fielddef_getjsonname(f, buf, sizeof(buf)); - name = buf; + name = upb_fielddef_jsonname(f); } jsonenc_putsep(e, ", ", first); @@ -598,13 +623,12 @@ static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, } } -static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { +static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { upb_msgval val; const upb_fielddef *f; bool first = true; - jsonenc_putstr(e, "{"); - if (e->options & UPB_JSONENC_EMITDEFAULTS) { /* Iterate over all fields. */ upb_msg_field_iter i; @@ -620,11 +644,15 @@ static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_fieldval(e, f, val, &first); } } +} +static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + jsonenc_putstr(e, "{"); + jsonenc_msgfields(e, msg, m); jsonenc_putstr(e, "}"); } -size_t jsonenc_nullz(jsonenc *e, size_t size) { +static size_t jsonenc_nullz(jsonenc *e, size_t size) { size_t ret = e->ptr - e->buf + e->overflow; if (size > 0) { @@ -647,9 +675,11 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, e.options = options; e.ext_pool = ext_pool; e.status = status; + e.arena = NULL; if (setjmp(e.err)) return -1; jsonenc_msg(&e, msg, m); + if (e.arena) upb_arena_free(e.arena); return jsonenc_nullz(&e, size); } diff --git a/upb/msg.c b/upb/msg.c index 6c6d3008aa4..8df9d7a1f76 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -9,7 +9,7 @@ /** upb_msg *******************************************************************/ -static char _upb_fieldtype_to_sizelg2[12] = { +static const char _upb_fieldtype_to_sizelg2[12] = { 0, 0, /* UPB_TYPE_BOOL */ 2, /* UPB_TYPE_FLOAT */ diff --git a/upb/reflection.c b/upb/reflection.c index 89a801266f8..d032eccee40 100644 --- a/upb/reflection.c +++ b/upb/reflection.c @@ -90,6 +90,18 @@ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) { } } +bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) { + upb_oneof_iter i; + const upb_fielddef *f; + const upb_msglayout_field *field; + + upb_oneof_begin(&i, o); + if (upb_oneof_done(&i)) return false; + f = upb_oneof_iter_field(&i); + field = upb_fielddef_layout(f); + return *oneofcase(msg, field) != 0; +} + upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { return _upb_msg_getraw(msg, f); @@ -136,8 +148,11 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, const upb_msglayout_field *field = upb_fielddef_layout(f); upb_mutmsgval ret; char *mem = PTR_AT(msg, field->offset, char); + bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number; + memcpy(&ret, mem, sizeof(void*)); - if (a && !ret.msg) { + + if (a && (!ret.msg || wrong_oneof)) { if (upb_fielddef_ismap(f)) { const upb_msgdef *entry = upb_fielddef_msgsubdef(f); const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY); @@ -149,7 +164,12 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, UPB_ASSERT(upb_fielddef_issubmsg(f)); ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a); } + memcpy(mem, &ret, sizeof(void*)); + + if (wrong_oneof) { + *oneofcase(msg, field) = field->number; + } } return ret; } diff --git a/upb/reflection.h b/upb/reflection.h index 95156b7d76b..c55fe4a2e61 100644 --- a/upb/reflection.h +++ b/upb/reflection.h @@ -44,6 +44,9 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, upb_arena *a) /* May only be called for fields where upb_fielddef_haspresence(f) == true. */ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f); +/* Returns whether any field is set in the oneof. */ +bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o); + /* Sets the given field to the given value. For a msg/array/map/string, the * value must be in the same arena. */ void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, diff --git a/upb/text_encode.c b/upb/text_encode.c index cb76f76253b..d426e67ea27 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -260,7 +260,7 @@ static const char *txtenc_unknown(txtenc *e, const char *ptr, const char *end, uint32_t tag; CHK(ptr = txtenc_parsevarint(ptr, end, &tag_64)); CHK(tag_64 < UINT32_MAX); - tag = tag_64; + tag = (uint32_t)tag_64; if ((tag & 7) == UPB_WIRE_TYPE_END_GROUP) { CHK((tag >> 3) == groupnum);