diff --git a/BUILD b/BUILD index 61601a067e..cddda82d18 100644 --- a/BUILD +++ b/BUILD @@ -162,9 +162,11 @@ cc_library( cc_library( name = "json", srcs = [ + "upb/json_decode.c", "upb/json_encode.c", ], hdrs = [ + "upb/json_decode.h", "upb/json_encode.h", ], deps = [ diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f0b2420bb..1f6004543f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,9 @@ target_link_libraries(textformat port reflection) add_library(json + upb/json_decode.c upb/json_encode.c + upb/json_decode.h upb/json_encode.h) target_link_libraries(json port diff --git a/generated_for_cmake/upb/json/parser.c b/generated_for_cmake/upb/json/parser.c index 33bb442f51..03087845d5 100644 --- a/generated_for_cmake/upb/json/parser.c +++ b/generated_for_cmake/upb/json/parser.c @@ -3311,15 +3311,13 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_value v = upb_value_constptr(f); - char *buf; + const char *name; /* Add an entry for the JSON name. */ - size_t len = upb_fielddef_getjsonname(f, NULL, 0); - buf = upb_malloc(alloc, len); - upb_fielddef_getjsonname(f, buf, len); - upb_strtable_insert3(&m->name_table, buf, strlen(buf), v, alloc); + name = upb_fielddef_jsonname(f); + upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - if (strcmp(buf, upb_fielddef_name(f)) != 0) { + if (strcmp(name, upb_fielddef_name(f)) != 0) { /* Since the JSON name is different from the regular field name, add an * entry for the raw name (compliant proto3 JSON parsers must accept * both). */ diff --git a/tests/conformance_upb.c b/tests/conformance_upb.c index 8285242d3e..1d87060095 100644 --- a/tests/conformance_upb.c +++ b/tests/conformance_upb.c @@ -15,6 +15,7 @@ #include "upb/decode.h" #include "upb/encode.h" #include "upb/reflection.h" +#include "upb/json_decode.h" #include "upb/json_encode.h" #include "upb/text_encode.h" @@ -85,9 +86,11 @@ void serialize_text(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { size_t len2; int opts = 0; char *data; + if (!conformance_ConformanceRequest_print_unknown_fields(c->request)) { opts |= UPB_TXTENC_SKIPUNKNOWN; } + len = upb_text_encode(msg, m, c->symtab, opts, NULL, 0); data = upb_arena_malloc(c->arena, len + 1); len2 = upb_text_encode(msg, m, c->symtab, opts, data, len + 1); @@ -96,6 +99,33 @@ void serialize_text(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { c->response, upb_strview_make(data, len)); } +bool parse_json(upb_msg *msg, const upb_msgdef *m, const ctx* c) { + upb_strview json = + conformance_ConformanceRequest_json_payload(c->request); + upb_status status; + int opts = 0; + + if (conformance_ConformanceRequest_test_category(c->request) == + conformance_JSON_IGNORE_UNKNOWN_PARSING_TEST) { + opts |= UPB_JSONDEC_IGNOREUNKNOWN; + } + + upb_status_clear(&status); + if (upb_json_decode(json.data, json.size, msg, m, c->symtab, opts, c->arena, + &status)) { + return true; + } else { + const char *inerr = upb_status_errmsg(&status); + size_t len = strlen(inerr); + char *err = upb_arena_malloc(c->arena, len + 1); + memcpy(err, inerr, strlen(inerr)); + err[len] = '\0'; + conformance_ConformanceResponse_set_parse_error(c->response, + upb_strview_makez(err)); + return false; + } +} + void serialize_json(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { size_t len; size_t len2; @@ -104,16 +134,16 @@ void serialize_json(const upb_msg *msg, const upb_msgdef *m, const ctx *c) { upb_status status; upb_status_clear(&status); - if (!conformance_ConformanceRequest_print_unknown_fields(c->request)) { - opts |= UPB_TXTENC_SKIPUNKNOWN; - } - len = upb_json_encode(msg, m, c->symtab, opts, NULL, 0, &status); if (len == -1) { - static const char msg[] = "Error serializing."; - conformance_ConformanceResponse_set_serialize_error( - c->response, upb_strview_make(msg, strlen(msg))); + const char *inerr = upb_status_errmsg(&status); + size_t len = strlen(inerr); + char *err = upb_arena_malloc(c->arena, len + 1); + memcpy(err, inerr, strlen(inerr)); + err[len] = '\0'; + conformance_ConformanceResponse_set_serialize_error(c->response, + upb_strview_makez(err)); return; } @@ -128,6 +158,8 @@ bool parse_input(upb_msg *msg, const upb_msgdef *m, const ctx* c) { switch (conformance_ConformanceRequest_payload_case(c->request)) { case conformance_ConformanceRequest_payload_protobuf_payload: return parse_proto(msg, m, c); + case conformance_ConformanceRequest_payload_json_payload: + return parse_json(msg, m, c); case conformance_ConformanceRequest_payload_NOT_SET: fprintf(stderr, "conformance_upb: Request didn't have payload.\n"); return false; diff --git a/upb/def.c b/upb/def.c index 8385e9638a..f1b477bdc9 100644 --- a/upb/def.c +++ b/upb/def.c @@ -27,6 +27,7 @@ struct upb_fielddef { const upb_filedef *file; const upb_msgdef *msgdef; const char *full_name; + const char *json_name; union { int64_t sint; uint64_t uint; @@ -117,10 +118,15 @@ struct upb_symtab { /* Inside a symtab we store tagged pointers to specific def types. */ typedef enum { - UPB_DEFTYPE_MSG = 0, - UPB_DEFTYPE_ENUM = 1, - UPB_DEFTYPE_FIELD = 2, - UPB_DEFTYPE_ONEOF = 3 + UPB_DEFTYPE_FIELD = 0, + + /* Only inside symtab table. */ + UPB_DEFTYPE_MSG = 1, + UPB_DEFTYPE_ENUM = 2, + + /* Only inside message table. */ + UPB_DEFTYPE_ONEOF = 1, + UPB_DEFTYPE_FIELD_JSONNAME = 2 } upb_deftype_t; static const void *unpack_def(upb_value v, upb_deftype_t type) { @@ -462,47 +468,12 @@ const char *upb_fielddef_name(const upb_fielddef *f) { return shortdefname(f->full_name); } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; +const char *upb_fielddef_jsonname(const upb_fielddef *f) { + return f->json_name; } -size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) { - const char *name = upb_fielddef_name(f); - size_t src, dst = 0; - bool ucase_next = false; - -#define WRITE(byte) \ - ++dst; \ - if (dst < len) buf[dst - 1] = byte; \ - else if (dst == len) buf[dst - 1] = '\0' - - if (!name) { - WRITE('\0'); - return 0; - } - - /* Implement the transformation as described in the spec: - * 1. upper case all letters after an underscore. - * 2. remove all underscores. - */ - for (src = 0; name[src]; src++) { - if (name[src] == '_') { - ucase_next = true; - continue; - } - - if (ucase_next) { - WRITE(toupper(name[src])); - ucase_next = false; - } else { - WRITE(name[src]); - } - } - - WRITE('\0'); - return dst; - -#undef WRITE +uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { + return f->selector_base; } const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { @@ -690,18 +661,30 @@ bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len, *o = unpack_def(val, UPB_DEFTYPE_ONEOF); *f = unpack_def(val, UPB_DEFTYPE_FIELD); - UPB_ASSERT((*o != NULL) ^ (*f != NULL)); /* Exactly one of the two should be set. */ - return true; + return *o || *f; /* False if this was a JSON name. */ +} + +const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, + const char *name, size_t len) { + upb_value val; + const upb_fielddef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return NULL; + } + + f = unpack_def(val, UPB_DEFTYPE_FIELD); + if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; } int upb_msgdef_numfields(const upb_msgdef *m) { - /* The number table contains only fields. */ - return (int)upb_inttable_count(&m->itof); + return m->field_count; } int upb_msgdef_numoneofs(const upb_msgdef *m) { - /* The name table includes oneofs, and the number table does not. */ - return (int)(upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof)); + return m->oneof_count; } const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) { @@ -1098,6 +1081,51 @@ static const char *makefullname(const symtab_addctx *ctx, const char *prefix, } } +size_t getjsonname(const char *name, char *buf, size_t len) { + size_t src, dst = 0; + bool ucase_next = false; + +#define WRITE(byte) \ + ++dst; \ + if (dst < len) buf[dst - 1] = byte; \ + else if (dst == len) buf[dst - 1] = '\0' + + if (!name) { + WRITE('\0'); + return 0; + } + + /* Implement the transformation as described in the spec: + * 1. upper case all letters after an underscore. + * 2. remove all underscores. + */ + for (src = 0; name[src]; src++) { + if (name[src] == '_') { + ucase_next = true; + continue; + } + + if (ucase_next) { + WRITE(toupper(name[src])); + ucase_next = false; + } else { + WRITE(name[src]); + } + } + + WRITE('\0'); + return dst; + +#undef WRITE +} + +static char* makejsonname(const char* name, upb_alloc *alloc) { + size_t size = getjsonname(name, NULL, 0); + char* json_name = upb_malloc(alloc, size); + getjsonname(name, json_name, size); + return json_name; +} + static bool symtab_add(const symtab_addctx *ctx, const char *name, upb_value v) { upb_value tmp; @@ -1311,6 +1339,7 @@ static bool create_fielddef( const google_protobuf_FieldOptions *options; upb_strview name; const char *full_name; + const char *json_name; const char *shortname; uint32_t field_number; @@ -1324,6 +1353,13 @@ static bool create_fielddef( full_name = makefullname(ctx, prefix, name); shortname = shortdefname(full_name); + if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { + json_name = strviewdup( + ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); + } else { + json_name = makejsonname(shortname, ctx->alloc); + } + field_number = google_protobuf_FieldDescriptorProto_number(field_proto); if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { @@ -1333,26 +1369,42 @@ static bool create_fielddef( if (m) { /* direct message field. */ - upb_value v, packed_v; + upb_value v, field_v, json_v; + size_t json_size; f = (upb_fielddef*)&m->fields[m->field_count++]; f->msgdef = m; f->is_extension_ = false; - packed_v = pack_def(f, UPB_DEFTYPE_FIELD); - v = upb_value_constptr(f); - - if (!upb_strtable_insert3(&m->ntof, name.data, name.size, packed_v, alloc)) { + if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname); return false; } - if (!upb_inttable_insert2(&m->itof, field_number, v, alloc)) { + if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { + upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name); + return false; + } + + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { upb_status_seterrf(ctx->status, "duplicate field number (%u)", field_number); return false; } + field_v = pack_def(f, UPB_DEFTYPE_FIELD); + json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME); + v = upb_value_constptr(f); + json_size = strlen(json_name); + + CHK_OOM( + upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); + CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + + if (strcmp(shortname, json_name) != 0) { + upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + } + if (ctx->layouts) { const upb_msglayout_field *fields = m->layout->fields; int count = m->layout->field_count; @@ -1369,12 +1421,13 @@ static bool create_fielddef( } } else { /* extension field. */ - f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count]; + f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; f->is_extension_ = true; CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD))); } f->full_name = full_name; + f->json_name = json_name; f->file = ctx->file; f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); @@ -1741,7 +1794,8 @@ static bool build_filedef( } else if (streql_view(syntax, "proto3")) { file->syntax = UPB_SYNTAX_PROTO3; } else { - upb_status_seterrf(ctx->status, "Invalid syntax '%s'", syntax); + upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(syntax)); return false; } } else { diff --git a/upb/def.h b/upb/def.h index 5bc361e28b..48e113dffe 100644 --- a/upb/def.h +++ b/upb/def.h @@ -99,10 +99,10 @@ upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f); upb_label_t upb_fielddef_label(const upb_fielddef *f); uint32_t upb_fielddef_number(const upb_fielddef *f); const char *upb_fielddef_name(const upb_fielddef *f); +const char *upb_fielddef_jsonname(const upb_fielddef *f); bool upb_fielddef_isextension(const upb_fielddef *f); bool upb_fielddef_lazy(const upb_fielddef *f); bool upb_fielddef_packed(const upb_fielddef *f); -size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len); const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f); const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f); uint32_t upb_fielddef_index(const upb_fielddef *f); @@ -151,32 +151,10 @@ class upb::FieldDefPtr { Type type() const { return upb_fielddef_type(ptr_); } Label label() const { return upb_fielddef_label(ptr_); } const char* name() const { return upb_fielddef_name(ptr_); } + const char* json_name() const { return upb_fielddef_jsonname(ptr_); } uint32_t number() const { return upb_fielddef_number(ptr_); } bool is_extension() const { return upb_fielddef_isextension(ptr_); } - /* Copies the JSON name for this field into the given buffer. Returns the - * actual size of the JSON name, including the NULL terminator. If the - * return value is 0, the JSON name is unset. If the return value is - * greater than len, the JSON name was truncated. The buffer is always - * NULL-terminated if len > 0. - * - * The JSON name always defaults to a camelCased version of the regular - * name. However if the regular name is unset, the JSON name will be unset - * also. - */ - size_t GetJsonName(char *buf, size_t len) const { - return upb_fielddef_getjsonname(ptr_, buf, len); - } - - /* Convenience version of the above function which copies the JSON name - * into the given string, returning false if the name is not set. */ - template - bool GetJsonName(T* str) { - str->resize(GetJsonName(NULL, 0)); - GetJsonName(&(*str)[0], str->size()); - return str->size() > 0; - } - /* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false, * indicates whether this field should have lazy parsing handlers that yield * the unparsed string for the submessage. @@ -455,6 +433,10 @@ UPB_INLINE bool upb_msgdef_lookupnamez(const upb_msgdef *m, const char *name, return upb_msgdef_lookupname(m, name, strlen(name), f, o); } +/* Returns a field by either JSON name or regular proto name. */ +const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, + const char *name, size_t len); + /* Iteration over fields and oneofs. For example: * * upb_msg_field_iter i; diff --git a/upb/json/parser.rl b/upb/json/parser.rl index de381b2571..19022df298 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -2874,15 +2874,13 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); upb_value v = upb_value_constptr(f); - char *buf; + const char *name; /* Add an entry for the JSON name. */ - size_t len = upb_fielddef_getjsonname(f, NULL, 0); - buf = upb_malloc(alloc, len); - upb_fielddef_getjsonname(f, buf, len); - upb_strtable_insert3(&m->name_table, buf, strlen(buf), v, alloc); + name = upb_fielddef_jsonname(f); + upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - if (strcmp(buf, upb_fielddef_name(f)) != 0) { + if (strcmp(name, upb_fielddef_name(f)) != 0) { /* Since the JSON name is different from the regular field name, add an * entry for the raw name (compliant proto3 JSON parsers must accept * both). */ diff --git a/upb/json/printer.c b/upb/json/printer.c index 930ee441f6..b0030e0e52 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -65,12 +65,8 @@ strpc *newstrpc(upb_handlers *h, const upb_fielddef *f, ret->ptr = upb_gstrdup(upb_fielddef_name(f)); ret->len = strlen(ret->ptr); } else { - size_t len; - ret->len = upb_fielddef_getjsonname(f, NULL, 0); - ret->ptr = upb_gmalloc(ret->len); - len = upb_fielddef_getjsonname(f, ret->ptr, ret->len); - UPB_ASSERT(len == ret->len); - ret->len--; /* NULL */ + ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f)); + ret->len = strlen(ret->ptr); } upb_handlers_addcleanup(h, ret, freestrpc); diff --git a/upb/json_decode.c b/upb/json_decode.c new file mode 100644 index 0000000000..54a55ad135 --- /dev/null +++ b/upb/json_decode.c @@ -0,0 +1,1405 @@ + +#include "upb/json_decode.h" + +#include +#include +#include +#include +#include +#include + +#include "upb/encode.h" +#include "upb/reflection.h" + +/* Special header, must be included last. */ +#include "upb/port_def.inc" + +typedef struct { + const char *ptr, *end; + upb_arena *arena; /* TODO: should we have a tmp arena for tmp data? */ + const upb_symtab *any_pool; + int depth; + upb_status *status; + jmp_buf err; + int line; + const char *line_begin; + bool is_first; + int options; + const upb_fielddef *debug_field; +} jsondec; + +enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL }; + +/* Forward declarations of mutually-recursive functions. */ +static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m); +static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f); +static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg, + const upb_msgdef *m); +static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m); + +static bool jsondec_streql(upb_strview str, const char *lit) { + return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0; +} + +UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) { + upb_status_seterrmsg(d->status, msg); + longjmp(d->err, 1); +} + +UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_status_vseterrf(d->status, fmt, argp); + va_end(argp); + longjmp(d->err, 1); +} + +static void jsondec_skipws(jsondec *d) { + while (d->ptr != d->end) { + switch (*d->ptr) { + case '\n': + d->line++; + d->line_begin = d->ptr; + /* Fallthrough. */ + case '\r': + case '\t': + case ' ': + d->ptr++; + break; + default: + return; + } + } + jsondec_err(d, "Unexpected EOF"); +} + +static bool jsondec_tryparsech(jsondec *d, char ch) { + if (d->ptr == d->end || *d->ptr != ch) return false; + d->ptr++; + return true; +} + +static void jsondec_parselit(jsondec *d, const char *lit) { + size_t len = strlen(lit); + if (d->end - d->ptr < len || memcmp(d->ptr, lit, len) != 0) { + jsondec_errf(d, "Expected: '%s'", lit); + } + d->ptr += len; +} + +static void jsondec_wsch(jsondec *d, char ch) { + jsondec_skipws(d); + if (!jsondec_tryparsech(d, ch)) { + jsondec_errf(d, "Expected: '%c'", ch); + } +} + +static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); } +static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); } +static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); } + +static void jsondec_entrysep(jsondec *d) { + jsondec_skipws(d); + jsondec_parselit(d, ":"); +} + +static int jsondec_rawpeek(jsondec *d) { + switch (*d->ptr) { + case '{': + return JD_OBJECT; + case '[': + return JD_ARRAY; + case '"': + return JD_STRING; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return JD_NUMBER; + case 't': + return JD_TRUE; + case 'f': + return JD_FALSE; + case 'n': + return JD_NULL; + default: + jsondec_errf(d, "Unexpected character: '%c'", *d->ptr); + } +} + +/* JSON object/array **********************************************************/ + +/* These are used like so: + * + * jsondec_objstart(d); + * while (jsondec_objnext(d)) { + * ... + * } + * jsondec_objend(d) */ + +static int jsondec_peek(jsondec *d) { + jsondec_skipws(d); + return jsondec_rawpeek(d); +} + +static void jsondec_push(jsondec *d) { + if (--d->depth < 0) { + jsondec_err(d, "Recursion limit exceeded"); + } + d->is_first = true; +} + +static bool jsondec_seqnext(jsondec *d, char end_ch) { + jsondec_skipws(d); + if (*d->ptr == end_ch) return false; + + if (d->is_first) { + d->is_first = false; + } else { + jsondec_parselit(d, ","); + } + + return true; +} + +static void jsondec_arrstart(jsondec *d) { + jsondec_push(d); + jsondec_wsch(d, '['); +} + +static void jsondec_arrend(jsondec *d) { + d->depth++; + jsondec_wsch(d, ']'); +} + +static bool jsondec_arrnext(jsondec *d) { + return jsondec_seqnext(d, ']'); +} + +static void jsondec_objstart(jsondec *d) { + jsondec_push(d); + jsondec_wsch(d, '{'); +} + +static void jsondec_objend(jsondec *d) { + d->depth++; + jsondec_wsch(d, '}'); +} + +static bool jsondec_objnext(jsondec *d) { + if (!jsondec_seqnext(d, '}')) return false; + if (jsondec_peek(d) != JD_STRING) { + jsondec_err(d, "Object must start with string"); + } + return true; +} + +/* JSON number ****************************************************************/ + +static bool jsondec_tryskipdigits(jsondec *d) { + const char *start = d->ptr; + + while (d->ptr < d->end) { + if (*d->ptr < '0' || *d->ptr > '9') { + break; + } + d->ptr++; + } + + return d->ptr != start; +} + +static void jsondec_skipdigits(jsondec *d) { + if (!jsondec_tryskipdigits(d)) { + jsondec_err(d, "Expected one or more digits"); + } +} + +static double jsondec_number(jsondec *d) { + const char *start = d->ptr; + + assert(jsondec_rawpeek(d) == JD_NUMBER); + + /* Skip over the syntax of a number, as specified by JSON. */ + if (*d->ptr == '-') d->ptr++; + + if (jsondec_tryparsech(d, '0')) { + if (jsondec_tryskipdigits(d)) { + jsondec_err(d, "number cannot have leading zero"); + } + } else { + jsondec_skipdigits(d); + } + + if (d->ptr == d->end) goto parse; + if (jsondec_tryparsech(d, '.')) { + jsondec_skipdigits(d); + } + if (d->ptr == d->end) goto parse; + + if (*d->ptr == 'e' || *d->ptr == 'E') { + d->ptr++; + if (d->ptr == d->end) { + jsondec_err(d, "Unexpected EOF in number"); + } + if (*d->ptr == '+' || *d->ptr == '-') { + d->ptr++; + } + jsondec_skipdigits(d); + } + +parse: + /* Having verified the syntax of a JSON number, use strtod() to parse + * (strtod() accepts a superset of JSON syntax). */ + errno = 0; + { + char* end; + double val = strtod(start, &end); + assert(end == d->ptr); + + /* Currently the min/max-val conformance tests fail if we check this. Does + * this mean the conformance tests are wrong or strtod() is wrong, or + * something else? Investigate further. */ + /* + if (errno == ERANGE) { + jsondec_err(d, "Number out of range"); + } + */ + + if (val > DBL_MAX || val < -DBL_MAX) { + jsondec_err(d, "Number out of range"); + } + + return val; + } +} + +/* JSON string ****************************************************************/ + +static char jsondec_escape(jsondec *d) { + switch (*d->ptr++) { + case '"': + return '\"'; + case '\\': + return '\\'; + case '/': + return '/'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + default: + jsondec_err(d, "Invalid escape char"); + } +} + +static uint32_t jsondec_codepoint(jsondec *d) { + uint32_t cp = 0; + const char *end; + + if (d->end - d->ptr < 4) { + jsondec_err(d, "EOF inside string"); + } + + end = d->ptr + 4; + while (d->ptr < end) { + char ch = *d->ptr++; + if (ch >= '0' && ch <= '9') { + ch -= '0'; + } else if (ch >= 'a' && ch <= 'f') { + ch = ch - 'a' + 10; + } else if (ch >= 'A' && ch <= 'F') { + ch = ch - 'A' + 10; + } else { + jsondec_err(d, "Invalid hex digit"); + } + cp = (cp << 4) | ch; + } + + return cp; +} + +/* Parses a \uXXXX unicode escape (possibly a surrogate pair). */ +static size_t jsondec_unicode(jsondec *d, char* out) { + uint32_t cp = jsondec_codepoint(d); + if (cp >= 0xd800 && cp <= 0xdbff) { + /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */ + uint32_t high = cp; + uint32_t low; + jsondec_parselit(d, "\\u"); + low = jsondec_codepoint(d); + if (low < 0xdc00 || low > 0xdfff) { + jsondec_err(d, "Invalid low surrogate"); + } + cp = (high & 0x3ff) << 10; + cp |= (low & 0x3ff); + cp += 0x10000; + } else if (cp >= 0xdc00 && cp <= 0xdfff) { + jsondec_err(d, "Unpaired low surrogate"); + } + + /* Write to UTF-8 */ + if (cp <= 0x7f) { + out[0] = cp; + return 1; + } else if (cp <= 0x07FF) { + out[0] = ((cp >> 6) & 0x1F) | 0xC0; + out[1] = ((cp >> 0) & 0x3F) | 0x80; + return 2; + } else if (cp <= 0xFFFF) { + out[0] = ((cp >> 12) & 0x0F) | 0xE0; + out[1] = ((cp >> 6) & 0x3F) | 0x80; + out[2] = ((cp >> 0) & 0x3F) | 0x80; + return 3; + } else if (cp < 0x10FFFF) { + out[0] = ((cp >> 18) & 0x07) | 0xF0; + out[1] = ((cp >> 12) & 0x3f) | 0x80; + out[2] = ((cp >> 6) & 0x3f) | 0x80; + out[3] = ((cp >> 0) & 0x3f) | 0x80; + return 4; + } else { + jsondec_err(d, "Invalid codepoint"); + } +} + +static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) { + size_t oldsize = *buf_end - *buf; + size_t len = *end - *buf; + size_t size = UPB_MAX(8, 2 * oldsize); + + *buf = upb_arena_realloc(d->arena, *buf, len, size); + *end = *buf + len; + *buf_end = *buf + size; +} + +static upb_strview jsondec_string(jsondec *d) { + char *buf = NULL; + char *end = NULL; + char *buf_end = NULL; + + jsondec_skipws(d); + + if (*d->ptr++ != '"') { + jsondec_err(d, "Expected string"); + } + + while (d->ptr < d->end) { + char ch = *d->ptr++; + + if (end == buf_end) { + jsondec_resize(d, &buf, &end, &buf_end); + } + + switch (ch) { + case '"': { + upb_strview ret = {buf, end - buf}; + return ret; + } + case '\\': + if (d->ptr == d->end) goto eof; + if (*d->ptr == 'u') { + d->ptr++; + if (buf_end - end < 4) { + // Allow space for maximum-sized code point (4 bytes). + jsondec_resize(d, &buf, &end, &buf_end); + } + end += jsondec_unicode(d, end); + } else { + *end++ = jsondec_escape(d); + } + break; + default: + if ((unsigned char)*d->ptr < 0x20) { + jsondec_err(d, "Invalid char in JSON string"); + } + *end++ = ch; + break; + } + } + +eof: + jsondec_err(d, "EOF inside string"); +} + +static void jsondec_skipval(jsondec *d) { + switch (jsondec_peek(d)) { + case JD_OBJECT: + jsondec_objstart(d); + while (jsondec_objnext(d)) { + jsondec_string(d); + jsondec_entrysep(d); + jsondec_skipval(d); + } + jsondec_objend(d); + break; + case JD_ARRAY: + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + jsondec_skipval(d); + } + jsondec_arrend(d); + break; + case JD_TRUE: + jsondec_true(d); + break; + case JD_FALSE: + jsondec_false(d); + break; + case JD_NULL: + jsondec_null(d); + break; + case JD_STRING: + jsondec_string(d); + break; + case JD_NUMBER: + jsondec_number(d); + break; + } +} + +/* Base64 decoding for bytes fields. ******************************************/ + +static int jsondec_base64_tablelookup(const char ch) { + /* Table includes the normal base64 chars plus the URL-safe variant. */ + const signed char table[256] = { + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/, + 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/, + 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1, + -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/, + 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/, + 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/, + 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/, + -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/, + 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/, + 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/, + 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/, + 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1}; + + /* Sign-extend return value so high bit will be set on any unexpected char. */ + return table[(unsigned)ch]; +} + +static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end, + char *out) { + int32_t val = -1; + + switch (end - ptr) { + case 2: + val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12; + out[0] = val >> 16; + out += 1; + break; + case 3: + val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12 | + jsondec_base64_tablelookup(ptr[2]) << 6; + out[0] = val >> 16; + out[1] = (val >> 8) & 0xff; + out += 2; + break; + } + + if (val < 0) { + jsondec_err(d, "Corrupt base64"); + } + + return out; +} + +static size_t jsondec_base64(jsondec *d, upb_strview str) { + /* We decode in place. This is safe because this is a new buffer (not + * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */ + char *out = (char*)str.data; + const char *ptr = str.data; + const char *end = ptr + str.size; + const char *end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */ + + for (; ptr < end4; ptr += 4, out += 3) { + int val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12 | + jsondec_base64_tablelookup(ptr[2]) << 6 | + jsondec_base64_tablelookup(ptr[3]) << 0; + + if (val < 0) { + /* Junk chars or padding. Remove trailing padding, if any. */ + if (end - ptr == 4 && ptr[3] == '=') { + if (ptr[2] == '=') { + end -= 2; + } else { + end -= 1; + } + } + break; + } + + out[0] = val >> 16; + out[1] = (val >> 8) & 0xff; + out[2] = val & 0xff; + } + + if (ptr < end) { + /* Process remaining chars. We do not require padding. */ + out = jsondec_partialbase64(d, ptr, end, out); + } + + return out - str.data; +} + +/* Low-level integer parsing **************************************************/ + +/* We use these hand-written routines instead of strto[u]l() because the "long + * long" variants aren't in c89. Also our version allows setting a ptr limit. */ + +static const char *jsondec_buftouint64(jsondec *d, const char *ptr, + const char *end, uint64_t *val) { + uint64_t u64 = 0; + while (ptr < end) { + unsigned ch = *ptr - '0'; + if (ch >= 10) break; + if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) { + jsondec_err(d, "Integer overflow"); + } + u64 *= 10; + u64 += ch; + ptr++; + } + + *val = u64; + return ptr; +} + +static const char *jsondec_buftoint64(jsondec *d, const char *ptr, + const char *end, int64_t *val) { + bool neg = false; + uint64_t u64; + + if (ptr != end && *ptr == '-') { + ptr++; + neg = true; + } + + ptr = jsondec_buftouint64(d, ptr, end, &u64); + if (u64 > (uint64_t)INT64_MAX + neg) { + jsondec_err(d, "Integer overflow"); + } + + *val = neg ? -u64 : u64; + return ptr; +} + +static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) { + const char *end = str.data + str.size; + uint64_t ret; + if (jsondec_buftouint64(d, str.data, end, &ret) != end) { + jsondec_err(d, "Non-number characters in quoted integer"); + } + return ret; +} + +static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) { + const char *end = str.data + str.size; + int64_t ret; + if (jsondec_buftoint64(d, str.data, end, &ret) != end) { + jsondec_err(d, "Non-number characters in quoted integer"); + } + return ret; +} + +/* Primitive value types ******************************************************/ + +/* Parse INT32 or INT64 value. */ +static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: { + double dbl = jsondec_number(d); + if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) { + jsondec_err(d, "JSON number is out of range."); + } + val.int64_val = dbl; /* must be guarded, overflow here is UB */ + if (val.int64_val != dbl) { + jsondec_errf(d, "JSON number was not integral (%d != %" PRId64 ")", dbl, + val.int64_val); + } + break; + } + case JD_STRING: { + upb_strview str = jsondec_string(d); + val.int64_val = jsondec_strtoint64(d, str); + break; + } + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_INT32) { + if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) { + jsondec_err(d, "Integer out of range."); + } + val.int32_val = val.int64_val; + } + + return val; +} + +/* Parse UINT32 or UINT64 value. */ +static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: { + double dbl = jsondec_number(d); + if (dbl > 18446744073709549568.0 || dbl < 0) { + jsondec_err(d, "JSON number is out of range."); + } + val.uint64_val = dbl; /* must be guarded, overflow here is UB */ + if (val.uint64_val != dbl) { + jsondec_errf(d, "JSON number was not integral (%d != %" PRIu64 ")", dbl, + val.uint64_val); + } + break; + } + case JD_STRING: { + upb_strview str = jsondec_string(d); + val.uint64_val = jsondec_strtouint64(d, str); + break; + } + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_UINT32) { + if (val.uint64_val > UINT32_MAX) { + jsondec_err(d, "Integer out of range."); + } + val.uint32_val = val.uint64_val; + } + + return val; +} + +/* Parse DOUBLE or FLOAT value. */ +static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) { + upb_strview str; + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: + val.double_val = jsondec_number(d); + break; + case JD_STRING: + str = jsondec_string(d); + if (jsondec_streql(str, "NaN")) { + val.double_val = 0.0 / 0.0; + } else if (jsondec_streql(str, "Infinity")) { + val.double_val = UPB_INFINITY; + } else if (jsondec_streql(str, "-Infinity")) { + val.double_val = -UPB_INFINITY; + } else { + val.double_val = strtod(str.data, NULL); + } + break; + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) { + if (val.double_val != UPB_INFINITY && val.double_val != -UPB_INFINITY && + (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) { + jsondec_err(d, "Float out of range"); + } + val.float_val = val.double_val; + } + + return val; +} + +/* Parse STRING or BYTES value. */ +static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + val.str_val = jsondec_string(d); + if (upb_fielddef_type(f) == UPB_TYPE_BYTES) { + val.str_val.size = jsondec_base64(d, val.str_val); + } + return val; +} + +static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) { + if (jsondec_peek(d) == JD_STRING) { + const upb_enumdef *e = upb_fielddef_enumsubdef(f); + upb_strview str = jsondec_string(d); + upb_msgval val; + if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) { + jsondec_err(d, "Unknown enumerator"); + } + return val; + } else { + return jsondec_int(d, f); + } +} + +static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) { + bool is_map_key = upb_fielddef_number(f) == 1 && + upb_msgdef_mapentry(upb_fielddef_containingtype(f)); + upb_msgval val; + + if (is_map_key) { + upb_strview str = jsondec_string(d); + if (jsondec_streql(str, "true")) { + val.bool_val = true; + } else if (jsondec_streql(str, "false")) { + val.bool_val = false; + } else { + jsondec_err(d, "Invalid boolean map key"); + } + } else { + switch (jsondec_peek(d)) { + case JD_TRUE: + val.bool_val = true; + jsondec_true(d); + break; + case JD_FALSE: + val.bool_val = false; + jsondec_false(d); + break; + default: + jsondec_err(d, "Expected true or false"); + } + } + + return val; +} + +/* Composite types (array/message/map) ****************************************/ + +static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) { + upb_array *arr = upb_msg_mutable(msg, f, d->arena).array; + + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + upb_msgval elem = jsondec_value(d, f); + upb_array_append(arr, elem, d->arena); + } + jsondec_arrend(d); +} + +static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) { + upb_map *map = upb_msg_mutable(msg, f, d->arena).map; + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); + const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); + + jsondec_objstart(d); + while (jsondec_objnext(d)) { + upb_msgval key, val; + key = jsondec_value(d, key_f); + jsondec_entrysep(d); + val = jsondec_value(d, val_f); + upb_map_set(map, key, val, d->arena); + } + jsondec_objend(d); +} + +static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + jsondec_object(d, msg, m); + } else { + jsondec_wellknown(d, msg, m); + } +} + +static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) { + const upb_msgdef *m = upb_fielddef_msgsubdef(f); + upb_msg *msg = upb_msg_new(m, d->arena); + upb_msgval val; + + jsondec_tomsg(d, msg, m); + val.msg_val = msg; + return val; +} + +static bool jsondec_isvalue(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_MESSAGE && + upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) == + UPB_WELLKNOWN_VALUE; +} + +static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_strview name; + const upb_fielddef *f; + const upb_fielddef *preserved; + + name = jsondec_string(d); + jsondec_entrysep(d); + f = upb_msgdef_lookupjsonname(m, name.data, name.size); + + if (!f) { + if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) { + jsondec_err(d, "Unknown field"); + } + jsondec_skipval(d); + return; + } + + if (upb_fielddef_containingoneof(f) && + upb_msg_hasoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + + if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { + /* JSON "null" indicates a default value, so no need to set anything. */ + return jsondec_null(d); + } + + preserved = d->debug_field; + d->debug_field = f; + + if (upb_fielddef_ismap(f)) { + jsondec_map(d, msg, f); + } else if (upb_fielddef_isseq(f)) { + jsondec_array(d, msg, f); + } else if (upb_fielddef_issubmsg(f)) { + upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg; + const upb_msgdef *subm = upb_fielddef_msgsubdef(f); + jsondec_tomsg(d, submsg, subm); + } else { + upb_msgval val = jsondec_value(d, f); + upb_msg_set(msg, f, val, d->arena); + } + + d->debug_field = preserved; +} + +static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + jsondec_objstart(d); + while (jsondec_objnext(d)) jsondec_field(d, msg, m); + jsondec_objend(d); +} + +static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + return jsondec_bool(d, f); + case UPB_TYPE_FLOAT: + case UPB_TYPE_DOUBLE: + return jsondec_double(d, f); + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + return jsondec_uint(d, f); + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + return jsondec_int(d, f); + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return jsondec_strfield(d, f); + case UPB_TYPE_ENUM: + return jsondec_enum(d, f); + case UPB_TYPE_MESSAGE: + return jsondec_msg(d, f); + default: + UPB_UNREACHABLE(); + } +} + +/* Well-known types ***********************************************************/ + +static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits, + const char *after) { + uint64_t val; + const char *p = *ptr; + const char *end = p + digits; + size_t after_len = after ? strlen(after) : 0; + + assert(digits <= 9); /* int can't overflow. */ + + if (jsondec_buftouint64(d, p, end, &val) != end || + (after_len && memcmp(end, after, after_len) != 0)) { + jsondec_err(d, "Malformed timestamp"); + } + + *ptr = end + after_len; + return val; +} + +static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) { + uint64_t nanos = 0; + const char *p = *ptr; + + if (p != end && *p == '.') { + const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos); + int digits = nano_end - p - 1; + int exp_lg10 = 9 - digits; + if (digits > 9) { + jsondec_err(d, "Too many digits for partial seconds"); + } + while (exp_lg10--) nanos *= 10; + *ptr = nano_end; + } + + return nanos; +} + +// jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0 +static int jsondec_epochdays(int y, int m, int d) { + unsigned year_base = 4800; /* Before minimum year, divisible by 100 & 400 */ + unsigned epoch = 2472632; /* Days between year_base and 1970 (Unix epoch) */ + unsigned carry = (unsigned)m - 3 > m; + unsigned m_adj = m - 3 + (carry ? 12 : 0); /* Month, counting from March */ + unsigned y_adj = y + year_base - carry; /* Year, positive and March-based */ + unsigned base_days = (365 * 4 + 1) * y_adj / 4; /* Approx days for year */ + unsigned centuries = y_adj / 100; + unsigned extra_leap_days = (3 * centuries + 3) / 4; /* base_days correction */ + unsigned year_days = (367 * (m_adj + 1)) / 12 - 30; /* Counting from March */ + return base_days - extra_leap_days + year_days + (d - 1) - epoch; +} + +static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) { + return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s; +} + +static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_msgval seconds; + upb_msgval nanos; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + + if (str.size < 20) goto malformed; + + { + // 1972-01-01T01:00:00 + int year = jsondec_tsdigits(d, &ptr, 4, "-"); + int mon = jsondec_tsdigits(d, &ptr, 2, "-"); + int day = jsondec_tsdigits(d, &ptr, 2, "T"); + int hour = jsondec_tsdigits(d, &ptr, 2, ":"); + int min = jsondec_tsdigits(d, &ptr, 2, ":"); + int sec = jsondec_tsdigits(d, &ptr, 2, NULL); + + seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec); + } + + nanos.int32_val = jsondec_nanos(d, &ptr, end); + + { + // [+-]08:00 or Z + int ofs = 0; + bool neg = false; + + if (ptr == end) goto malformed; + + switch (*ptr++) { + case '-': + neg = true; + /* Fallthrough intended. */ + case '+': + if ((end - ptr) != 5) goto malformed; + ofs = jsondec_tsdigits(d, &ptr, 2, ":00"); + ofs *= 60 * 60; + seconds.int64_val += (neg ? ofs : -ofs); + break; + case 'Z': + if (ptr != end) goto malformed; + break; + default: + goto malformed; + } + } + + if (seconds.int64_val < -62135596800) { + jsondec_err(d, "Timestamp out of range"); + } + + upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena); + upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena); + return; + +malformed: + jsondec_err(d, "Malformed timestamp"); +} + +static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_msgval seconds; + upb_msgval nanos; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + + // "3.000000001s", "3s", etc. + ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val); + nanos.int32_val = jsondec_nanos(d, &ptr, end); + + if (end - ptr != 1 || *ptr != 's') { + jsondec_err(d, "Malformed duration"); + } + + if (seconds.int64_val < -315576000000LL || seconds.int64_val > 315576000000LL) { + jsondec_err(d, "Duration out of range"); + } + + if (seconds.int64_val < 0) { + nanos.int32_val = - nanos.int32_val; + } + + upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena); + upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena); +} + +static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *values_f = upb_msgdef_itof(m, 1); + const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f); + upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array; + + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + upb_msg *value_msg = upb_msg_new(value_m, d->arena); + upb_msgval value; + value.msg_val = value_msg; + upb_array_append(values, value, d->arena); + jsondec_wellknownvalue(d, value_msg, value_m); + } + jsondec_arrend(d); +} + +static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *fields_f = upb_msgdef_itof(m, 1); + const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f); + const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2); + const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f); + upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map; + + jsondec_objstart(d); + while (jsondec_objnext(d)) { + upb_msgval key, value; + upb_msg *value_msg = upb_msg_new(value_m, d->arena); + key.str_val = jsondec_string(d); + value.msg_val = value_msg; + upb_map_set(fields, key, value, d->arena); + jsondec_entrysep(d); + jsondec_wellknownvalue(d, value_msg, value_m); + } + jsondec_objend(d); +} + +static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg, + const upb_msgdef *m) { + upb_msgval val; + const upb_fielddef *f; + upb_msg *submsg; + + switch (jsondec_peek(d)) { + case JD_NUMBER: + /* double number_value = 2; */ + f = upb_msgdef_itof(m, 2); + val.double_val = jsondec_number(d); + break; + case JD_STRING: + /* string string_value = 3; */ + f = upb_msgdef_itof(m, 3); + val.str_val = jsondec_string(d); + break; + case JD_FALSE: + /* bool bool_value = 4; */ + f = upb_msgdef_itof(m, 4); + val.bool_val = false; + jsondec_false(d); + break; + case JD_TRUE: + /* bool bool_value = 4; */ + f = upb_msgdef_itof(m, 4); + val.bool_val = true; + jsondec_true(d); + break; + case JD_NULL: + /* NullValue null_value = 1; */ + f = upb_msgdef_itof(m, 1); + val.int32_val = 0; + jsondec_null(d); + break; + /* Note: these cases return, because upb_msg_mutable() is enough. */ + case JD_OBJECT: + /* Struct struct_value = 5; */ + f = upb_msgdef_itof(m, 5); + submsg = upb_msg_mutable(msg, f, d->arena).msg; + jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f)); + return; + case JD_ARRAY: + /* ListValue list_value = 6; */ + f = upb_msgdef_itof(m, 6); + submsg = upb_msg_mutable(msg, f, d->arena).msg; + jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f)); + return; + default: + UPB_UNREACHABLE(); + } + + upb_msg_set(msg, f, val, d->arena); +} + +static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) { + /* FieldMask fields grow due to inserted '_' characters, so we can't do the + * transform in place. */ + const char *ptr = buf; + upb_strview ret; + char *out; + + ret.size = end - ptr; + while (ptr < end) { + ret.size += (*ptr >= 'A' && *ptr <= 'Z'); + ptr++; + } + + out = upb_arena_malloc(d->arena, ret.size); + ptr = buf; + ret.data = out; + + while (ptr < end) { + char ch = *ptr++; + if (ch >= 'A' && ch <= 'Z') { + *out++ = '_'; + *out++ = ch + 32; + } else if (ch == '_') { + jsondec_err(d, "field mask may not contain '_'"); + } else { + *out++ = ch; + } + } + + return ret; +} + +static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + /* repeated string paths = 1; */ + const upb_fielddef *paths_f = upb_msgdef_itof(m, 1); + upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + upb_msgval val; + + while (ptr < end) { + const char *elem_end = memchr(ptr, ',', end - ptr); + if (elem_end) { + val.str_val = jsondec_mask(d, ptr, elem_end); + ptr = elem_end + 1; + } else { + val.str_val = jsondec_mask(d, ptr, end); + ptr = end; + } + upb_array_append(arr, val, d->arena); + } +} + +static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + /* For regular types: {"@type": "[user type]", "f1": , "f2": } + * where f1, f2, etc. are the normal fields of this type. */ + jsondec_field(d, msg, m); + } else { + /* For well-known types: {"@type": "[well-known type]", "value": } + * where is whatever encoding the WKT normally uses. */ + upb_strview str = jsondec_string(d); + jsondec_entrysep(d); + if (!jsondec_streql(str, "value")) { + jsondec_err(d, "Key for well-known type must be 'value'"); + } + jsondec_wellknown(d, msg, m); + } +} + +static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); + const upb_msgdef *type_m; + upb_strview type_url = jsondec_string(d); + const char *end = type_url.data + type_url.size; + const char *ptr = end; + upb_msgval val; + + val.str_val = type_url; + upb_msg_set(msg, type_url_f, val, d->arena); + + /* Find message name after the last '/' */ + while (ptr > type_url.data && *--ptr != '/') {} + + if (ptr == type_url.data || ptr == end) { + jsondec_err(d, "Type url must have at least one '/' and non-empty host"); + } + + ptr++; + type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr); + + if (!type_m) { + jsondec_err(d, "Type was not found"); + } + + return type_m; +} + +static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + /* string type_url = 1; + * bytes value = 2; */ + const upb_fielddef *value_f = upb_msgdef_itof(m, 2); + upb_msg *any_msg; + const upb_msgdef *any_m = NULL; + const char *pre_type_data = NULL; + const char *pre_type_end = NULL; + upb_msgval encoded; + + jsondec_objstart(d); + + /* Scan looking for "@type", which is not necessarily first. */ + while (!any_m && jsondec_objnext(d)) { + const char *start = d->ptr; + upb_strview name = jsondec_string(d); + jsondec_entrysep(d); + if (jsondec_streql(name, "@type")) { + any_m = jsondec_typeurl(d, msg, m); + if (pre_type_data) { + pre_type_end = start; + while (*pre_type_end != ',') pre_type_end--; + } + } else { + if (!pre_type_data) pre_type_data = start; + jsondec_skipval(d); + } + } + + if (!any_m) { + jsondec_err(d, "Any object didn't contain a '@type' field"); + } + + any_msg = upb_msg_new(any_m, d->arena); + + if (pre_type_data) { + size_t len = pre_type_end - pre_type_data + 1; + char *tmp = upb_arena_malloc(d->arena, len); + memcpy(tmp, pre_type_data, len - 1); + tmp[len - 1] = '}'; + const char *saved_ptr = d->ptr; + const char *saved_end = d->end; + d->ptr = tmp; + d->end = tmp + len; + d->is_first = true; + while (jsondec_objnext(d)) { + jsondec_anyfield(d, any_msg, any_m); + } + d->ptr = saved_ptr; + d->end = saved_end; + } + + while (jsondec_objnext(d)) { + jsondec_anyfield(d, any_msg, any_m); + } + + jsondec_objend(d); + + encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena, + &encoded.str_val.size); + upb_msg_set(msg, value_f, encoded, d->arena); +} + +static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *value_f = upb_msgdef_itof(m, 1); + upb_msgval val = jsondec_value(d, value_f); + upb_msg_set(msg, value_f, val, d->arena); +} + +static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + switch (upb_msgdef_wellknowntype(m)) { + case UPB_WELLKNOWN_ANY: + jsondec_any(d, msg, m); + break; + case UPB_WELLKNOWN_FIELDMASK: + jsondec_fieldmask(d, msg, m); + break; + case UPB_WELLKNOWN_DURATION: + jsondec_duration(d, msg, m); + break; + case UPB_WELLKNOWN_TIMESTAMP: + jsondec_timestamp(d, msg, m); + break; + case UPB_WELLKNOWN_VALUE: + jsondec_wellknownvalue(d, msg, m); + break; + case UPB_WELLKNOWN_LISTVALUE: + jsondec_listvalue(d, msg, m); + break; + case UPB_WELLKNOWN_STRUCT: + jsondec_struct(d, msg, m); + break; + case UPB_WELLKNOWN_DOUBLEVALUE: + case UPB_WELLKNOWN_FLOATVALUE: + case UPB_WELLKNOWN_INT64VALUE: + case UPB_WELLKNOWN_UINT64VALUE: + case UPB_WELLKNOWN_INT32VALUE: + case UPB_WELLKNOWN_UINT32VALUE: + case UPB_WELLKNOWN_STRINGVALUE: + case UPB_WELLKNOWN_BYTESVALUE: + case UPB_WELLKNOWN_BOOLVALUE: + jsondec_wrapper(d, msg, m); + break; + default: + UPB_UNREACHABLE(); + } +} + +bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msgdef *m, const upb_symtab *any_pool, + int options, upb_arena *arena, upb_status *status) { + jsondec d; + d.ptr = buf; + d.end = buf + size; + d.arena = arena; + d.any_pool = any_pool; + d.status = status; + d.options = options; + d.depth = 64; + d.line = 1; + d.debug_field = NULL; + d.is_first = false; + + if (setjmp(d.err)) return false; + + jsondec_object(&d, msg, m); + return true; +} diff --git a/upb/json_decode.h b/upb/json_decode.h new file mode 100644 index 0000000000..01ab9580f0 --- /dev/null +++ b/upb/json_decode.h @@ -0,0 +1,24 @@ + +#ifndef UPB_JSONDECODE_H_ +#define UPB_JSONDECODE_H_ + +#include "upb/def.h" +#include "upb/msg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + UPB_JSONDEC_IGNOREUNKNOWN = 1 +}; + +bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msgdef *m, const upb_symtab *any_pool, + int options, upb_arena *arena, upb_status *status); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_JSONDECODE_H_ */ diff --git a/upb/json_encode.c b/upb/json_encode.c index d8adf1b722..285bcecea5 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -29,13 +29,23 @@ static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); +static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m); static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); -static void jsonenc_err(jsonenc *e, const char *msg) { +UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { upb_status_seterrmsg(e->status, msg); longjmp(e->err, 1); } +static upb_arena *jsonenc_arena(jsonenc *e) { + /* Create lazily, since it's only needed for Any */ + if (!e->arena) { + e->arena = upb_arena_new(); + } + return e->arena; +} + static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { size_t have = e->end - e->ptr; if (UPB_LIKELY(have >= len)) { @@ -70,19 +80,19 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { } static void jsonenc_nanos(jsonenc *e, int32_t nanos) { - const char zeros[3] = "000"; + int digits = 9; if (nanos == 0) return; if (nanos < 0 || nanos >= 1000000000) { jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos"); } - jsonenc_printf(e, "%09" PRId32, nanos); - - /* Remove trailing zeros, 3 at a time. */ - while ((e->ptr - e->buf) >= 3 && memcmp(e->ptr, zeros, 3) == 0) { - e->ptr -= 3; + while (nanos % 1000 == 0) { + nanos /= 1000; + digits -= 3; } + + jsonenc_printf(e, ".%0.*" PRId32, digits, nanos); } static void jsonenc_timestamp(jsonenc *e, const upb_msg *msg, @@ -107,7 +117,7 @@ static void jsonenc_timestamp(jsonenc *e, const upb_msg *msg, * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for * Processing Calendar Dates," Communications of the Association of * Computing Machines, vol. 11 (1968), p. 657. */ - L = (seconds / 86400) + 2440588; + L = (seconds / 86400) + 68569 + 2440588; N = 4 * L / 146097; L = L - (146097 * N + 3) / 4; I = 4000 * (L + 1) / 1461001; @@ -138,6 +148,10 @@ static void jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m jsonenc_err(e, "bad duration"); } + if (nanos < 0) { + nanos = -nanos; + } + jsonenc_printf(e, "\"%" PRId64, seconds); jsonenc_nanos(e, nanos); jsonenc_putstr(e, "s\""); @@ -158,8 +172,8 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { /* This is the regular base64, not the "web-safe" version. */ static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - const char *ptr = str.data; - const char *end = ptr + str.size; + const unsigned char *ptr = (unsigned char*)str.data; + const unsigned char *end = ptr + str.size; char buf[4]; jsonenc_putstr(e, "\""); @@ -212,10 +226,10 @@ static void jsonenc_stringbody(jsonenc *e, upb_strview str) { jsonenc_putstr(e, "\\\""); break; case '\f': - jsonenc_putstr(e, "\f'"); + jsonenc_putstr(e, "\\f"); break; case '\b': - jsonenc_putstr(e, "\b'"); + jsonenc_putstr(e, "\\b"); break; case '\\': jsonenc_putstr(e, "\\\\"); @@ -255,21 +269,22 @@ static void jsonenc_double(jsonenc *e, const char *fmt, double val) { static void jsonenc_wrapper(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { const upb_fielddef *val_f = upb_msgdef_itof(m, 1); - upb_msgval val = upb_msg_get(m, val_f); + upb_msgval val = upb_msg_get(msg, val_f); jsonenc_scalar(e, val, val_f); } -const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { +static const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { /* Find last '/', if any. */ const char *end = type_url.data + type_url.size; const char *ptr = end; + const upb_msgdef *ret; - if (!e->ext_pool || type_url.size == 0) return NULL; + if (!e->ext_pool || type_url.size == 0) goto badurl; while (true) { if (--ptr == type_url.data) { /* Type URL must contain at least one '/', with host before. */ - return NULL; + goto badurl; } if (*ptr == '/') { ptr++; @@ -277,19 +292,29 @@ const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { } } - return upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr); + ret = upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr); + + if (!ret) { + jsonenc_err(e, "Couldn't find Any type"); + } + + return ret; + +badurl: + jsonenc_err(e, "Bad type URL"); } static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); - const upb_fielddef *value_f = upb_msgdef_itof(m, 1); + const upb_fielddef *value_f = upb_msgdef_itof(m, 2); upb_strview type_url = upb_msg_get(msg, type_url_f).str_val; upb_strview value = upb_msg_get(msg, value_f).str_val; const upb_msgdef *any_m = jsonenc_getanymsg(e, type_url); const upb_msglayout *any_layout = upb_msgdef_layout(any_m); - upb_msg *any = upb_msg_new(any_m, e->arena); + upb_arena *arena = jsonenc_arena(e); + upb_msg *any = upb_msg_new(any_m, arena); - if (!upb_decode(value.data, value.size, any, any_layout, e->arena)) { + if (!upb_decode(value.data, value.size, any, any_layout, arena)) { jsonenc_err(e, "Error decoding message in Any"); } @@ -297,9 +322,9 @@ static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_string(e, type_url); jsonenc_putstr(e, ", "); - if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { /* Regular messages: {"@type": "...", "foo": 1, "bar": 2} */ - jsonenc_msg(e, any, any_m); + jsonenc_msgfields(e, any, any_m); } else { /* Well-known type: {"@type": "...", "value": } */ jsonenc_putstr(e, "value: "); @@ -323,15 +348,17 @@ static void jsonenc_fieldpath(jsonenc *e, upb_strview path) { while (ptr < end) { char ch = *ptr; + if (ch >= 'A' && ch <= 'Z') { jsonenc_err(e, "Field mask element may not have upper-case letter."); } else if (ch == '_') { if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') { jsonenc_err(e, "Underscore must be followed by a lowercase letter."); } - } else { - jsonenc_putbytes(e, &ch, 1); + ch = *++ptr - 32; } + + jsonenc_putbytes(e, &ch, 1); ptr++; } } @@ -468,7 +495,7 @@ static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, jsonenc_listvalue(e, msg, m); break; case UPB_WELLKNOWN_STRUCT: - jsonenc_listvalue(e, msg, m); + jsonenc_struct(e, msg, m); break; } } @@ -532,6 +559,7 @@ static void jsonenc_mapkey(jsonenc *e, upb_msgval val, const upb_fielddef *f) { break; case UPB_TYPE_STRING: jsonenc_stringbody(e, val.str_val); + break; default: UPB_UNREACHABLE(); } @@ -575,15 +603,12 @@ static void jsonenc_map(jsonenc *e, const upb_map *map, const upb_fielddef *f) { static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, upb_msgval val, bool *first) { - char buf[128]; const char *name; if (e->options & UPB_JSONENC_PROTONAMES) { name = upb_fielddef_name(f); } else { - /* TODO(haberman): we need a better JSON name API. */ - upb_fielddef_getjsonname(f, buf, sizeof(buf)); - name = buf; + name = upb_fielddef_jsonname(f); } jsonenc_putsep(e, ", ", first); @@ -598,13 +623,12 @@ static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, } } -static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { +static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { upb_msgval val; const upb_fielddef *f; bool first = true; - jsonenc_putstr(e, "{"); - if (e->options & UPB_JSONENC_EMITDEFAULTS) { /* Iterate over all fields. */ upb_msg_field_iter i; @@ -620,11 +644,15 @@ static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_fieldval(e, f, val, &first); } } +} +static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + jsonenc_putstr(e, "{"); + jsonenc_msgfields(e, msg, m); jsonenc_putstr(e, "}"); } -size_t jsonenc_nullz(jsonenc *e, size_t size) { +static size_t jsonenc_nullz(jsonenc *e, size_t size) { size_t ret = e->ptr - e->buf + e->overflow; if (size > 0) { @@ -647,9 +675,11 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, e.options = options; e.ext_pool = ext_pool; e.status = status; + e.arena = NULL; if (setjmp(e.err)) return -1; jsonenc_msg(&e, msg, m); + if (e.arena) upb_arena_free(e.arena); return jsonenc_nullz(&e, size); } diff --git a/upb/reflection.c b/upb/reflection.c index 89a801266f..d032eccee4 100644 --- a/upb/reflection.c +++ b/upb/reflection.c @@ -90,6 +90,18 @@ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) { } } +bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) { + upb_oneof_iter i; + const upb_fielddef *f; + const upb_msglayout_field *field; + + upb_oneof_begin(&i, o); + if (upb_oneof_done(&i)) return false; + f = upb_oneof_iter_field(&i); + field = upb_fielddef_layout(f); + return *oneofcase(msg, field) != 0; +} + upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { return _upb_msg_getraw(msg, f); @@ -136,8 +148,11 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, const upb_msglayout_field *field = upb_fielddef_layout(f); upb_mutmsgval ret; char *mem = PTR_AT(msg, field->offset, char); + bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number; + memcpy(&ret, mem, sizeof(void*)); - if (a && !ret.msg) { + + if (a && (!ret.msg || wrong_oneof)) { if (upb_fielddef_ismap(f)) { const upb_msgdef *entry = upb_fielddef_msgsubdef(f); const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY); @@ -149,7 +164,12 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, UPB_ASSERT(upb_fielddef_issubmsg(f)); ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a); } + memcpy(mem, &ret, sizeof(void*)); + + if (wrong_oneof) { + *oneofcase(msg, field) = field->number; + } } return ret; } diff --git a/upb/reflection.h b/upb/reflection.h index 95156b7d76..c55fe4a2e6 100644 --- a/upb/reflection.h +++ b/upb/reflection.h @@ -44,6 +44,9 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, upb_arena *a) /* May only be called for fields where upb_fielddef_haspresence(f) == true. */ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f); +/* Returns whether any field is set in the oneof. */ +bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o); + /* Sets the given field to the given value. For a msg/array/map/string, the * value must be in the same arena. */ void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val,