From 60d0966a0bb18c263b78bd49f7cc0fa9d8f5ef0b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 10 Feb 2020 09:44:07 -0800 Subject: [PATCH] [jsonencoder]: wrote most of the code, haven't even compiled yet, let alone tested. --- upb/json_encode.c | 577 ++++++++++++++++++++++++++++++++++++++++++++++ upb/json_encode.h | 36 +++ 2 files changed, 613 insertions(+) create mode 100644 upb/json_encode.c create mode 100644 upb/json_encode.h diff --git a/upb/json_encode.c b/upb/json_encode.c new file mode 100644 index 0000000000..5838babb20 --- /dev/null +++ b/upb/json_encode.c @@ -0,0 +1,577 @@ + +#include "upb/jsonencode.h" + +#include +#include +#include +#include +#include +#include + +#include "upb/decoder.h" +#include "upb/port_def.inc" +#include "upb/reflection.h" + +typedef struct { + char *buf, *ptr, *end; + size_t overflow; + int indent_depth; + int options; + const upb_symtab *ext_pool; + upb_arena *arena; +} jsonenc; + +static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); + +static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { + size_t have = e->end - e->ptr; + if (UPB_LIKELY(have >= len)) { + memcpy(e->ptr, data, len); + e->ptr += len; + } else { + memcpy(e->ptr, data, have); + e->ptr += have; + e->overflow += (len - have); + } +} + +static void jsonenc_putstr(jsonenc *e, const char *str) { + jsonenc_putbytes(e, str, strlen(str)); +} + +static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { + size_t n; + size_t have = e->end - e->ptr; + va_list args; + + va_start(args, fmt); + n = _upb_vsnprintf(e->ptr, have, fmt, args); + va_end(args); + + if (UPB_LIKELY(have > n)) { + e->ptr += n; + } else { + e->ptr += have; + e->overflow += (n - have); + } +} + +static void jsonenc_nanos(jsonenc *e, int32_t nanos) { + const char zeros[3] = "000"; + + if (nanos == 0) return; + if (nanos < 0 || nanos >= 1000000000) { + jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos"); + } + + jsonenc_printf(e, "%09" PRId32, nanos); + + /* Remove trailing zeros, 3 at a time. */ + while ((e->ptr - e->buf) >= 3 && memcmp(e->ptr, zeros, 3) == 0) { + e->ptr -= 3; + } +} + +static bool jsonenc_timestamp(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1); + const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2); + int64_t seconds = upb_msg_get(msg, seconds_f).int64_val; + int32_t nanos = upb_msg_get(msg, nanos_f).int32_val; + int L, N, I, J, K, h, m, s; + + if (seconds < -62135596800) { + jsonenc_err(e, + "error formatting timestamp as JSON: minimum acceptable value " + "is 0001-01-01T00:00:00Z"); + } else if (seconds > 253402300799) { + jsonenc_err(e, + "error formatting timestamp as JSON: maximum acceptable value " + "is 9999-12-31T23:59:59Z"); + } + + /* Julian Day -> Y/M/D, Algorithm from: + * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for + * Processing Calendar Dates," Communications of the Association of + * Computing Machines, vol. 11 (1968), p. 657. */ + L = days + adjustment; + N = 4 * L / 146097; + L = L - (146097 * N + 3) / 4; + I = 4000 * (L + 1) / 1461001; + L = L - 1461 * I / 4 + 31; + J = 80 * L / 2447; + K = L - 2447 * J / 80; + L = J / 11; + J = J + 2 - 12 * L; + I = 100 * (N - 49) + I + L; + + s = seconds % 60; + m = (seconds / 60) % 60; + h = (seconds / 3600) % 24; + + jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, h, m, s); + jsonenc_nanos(nanos); + jsonenc_putstr(e, "Z\""); +} + +static bool jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1); + const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2); + int64_t seconds = upb_msg_get(msg, seconds_f).int64_val; + int32_t nanos = upb_msg_get(msg, nanos_f).int32_val; + + if (seconds > 315576000000 || seconds < -315576000000 || + (seconds < 0) != (nanos < 0)) { + jsonenc_err(e, "bad duration"); + } + + jsonenc_printf(e, "\"%" PRId64, seconds); + jsonenc_nanos(e, nanos); + jsonenc_putstr(e, "s\""); +} + +static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) { + const upb_enumdef *e_def = upb_fielddef_enumsubdef(f); + const char *name = upb_enumdef_iton(e_def, val); + + if (name) { + jsonenc_printf(e, "\"%s\"", name); + } else { + jsonenc_printf(e, "%" PRId32, val); + } +} + +static void jsonenc_bytes(jsonenc *e, upb_strview str) { + /* This is the regular base64, not the "web-safe" version. */ + static const char base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + const char *ptr = str.data; + const char *end = ptr + str.size; + char buf[4]; + + jsonenc_putstr(p, "\""); + + while (end - ptr >= 3) { + buf[0] = base64[ptr[0] >> 2]; + buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)]; + buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)]; + buf[3] = base64[ptr[2] & 0x3f]; + jsonenc_putbytes(buf, 4); + ptr += 3; + } + + switch (end - ptr) { + case 2: + buf[0] = base64[from[0] >> 2]; + buf[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; + buf[2] = base64[(from[1] & 0xf) << 2]; + buf[3] = '='; + jsonenc_putbytes(buf, 4); + break; + case 1: + buf[0] = base64[from[0] >> 2]; + buf[1] = base64[((from[0] & 0x3) << 4)]; + buf[2] = '='; + buf[3] = '='; + jsonenc_putbytes(buf, 4); + break; + } + + jsonenc_putstr(p, "\""); +} + +static void jsonenc_string(jsonenc *e, upb_strview str) { + const char *ptr = str.data; + const char *end = ptr + str.size; + jsonenc_putstr(e, "\""); + + while (ptr < end) { + switch (*ptr) { + case '\n': + jsonenc_putstr(e, "\\n"); + break; + case '\r': + jsonenc_putstr(e, "\\r"); + break; + case '\t': + jsonenc_putstr(e, "\\t"); + break; + case '\"': + jsonenc_putstr(e, "\\\""); + break; + case '\f': + jsonenc_putstr(e, "\f'"); + break; + case '\b': + jsonenc_putstr(e, "\b'"); + break; + case '\\': + jsonenc_putstr(e, "\\\\"); + break; + default: + if ((uint8_t)*ptr < 0x20) { + jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr); + } else { + /* This could be a non-ASCII byte. We rely on the string being valid + * UTF-8. */ + jsonenc_putbytes(ptr, 1); + } + break; + } + ptr++; + } + + jsonenc_putstr(e, "\""); +} + +static void jsonenc_double(jsonenc *e, const char *fmt, double val) { + switch (val) { + case UPB_INFINITY: + jsonenc_putstr(e, "\"Infinity\""); + break; + case -UPB_INFINITY: + jsonenc_putstr(e, "\"-Infinity\""); + break; + default: + jsonenc_printf(e, fmt, val); + break; + } +} + +static void jsonenc_wrapper(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *val_f = upb_msgdef_itof(m, 1); + upb_msgval val = upb_msg_get(m, val_f); + jsonenc_msgval(e, val, val_f); +} + +static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); + const upb_fielddef *value_f = upb_msgdef_itof(m, 1); + upb_strview type_url = upb_msg_get(msg, type_url_f).str_val; + upb_strview value = upb_msg_get(msg, value_f).str_val; + const upb_msgdef *any_m = jsonenc_getanymsg(e, type_url); + const upb_msg *any = upb_msg_new(any_m, e->arena); + const upb_msglayout *any_layout = upb_msgdef_layout(any_m); + + if (!upb_decode(value.data, value.size, any, any_layout, e->arena)) { + jsonenc_err("Error decoding message in Any"); + } + + jsonenc_putstr(e, "{\"@type\": "); + jsonenc_string(e, type_url); + jsonenc_putstr(e, ", "); + + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + /* Regular messages: {"@type": "...", "foo": 1, "bar": 2} */ + jsonenc_msg(e, any, any_m); + } else { + /* Well-known type: {"@type": "...", "value": } */ + jsonenc_putstr(e, "value: "); + jsonenc_msgfield(e, any, any_m); + } + + jsonenc_putstr(e, "}"); +} + +static void jsonenc_putsep(jsonenc *e, bool *first) { + if (*first) { + *first = false; + } else { + jsonenc_putstr(", "); + } +} + +static void jsonenc_struct(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *fields_f = upb_msgdef_itof(m, 1); + const upb_map *fields = upb_msg_get(msg, fields_f); + const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f); + const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2); + size_t iter = UPB_MAP_BEGIN; + bool first = true; + + jsonenc_putstr(e, "{"); + + while (upb_mapiter_next(fields, &iter)) { + upb_msgval key = upb_mapiter_key(map, iter); + upb_msgval val = upb_mapiter_value(map, iter); + + jsonenc_putsep(e, &first); + jsonenc_string(e, key.str_val); + jsonenc_putstr(e, ": "); + jsonenc_value(e, val.msg_val, upb_fielddef_msgsubdef(value_f)); + } + + jsonenc_putstr(e, "}"); +} + +static void jsonenc_listvalue(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *values_f = upb_msgdef_itof(m, 1); + const upb_msgdef *values_m = upb_fielddef_msgsubdef(values_f); + const upb_array *values = upb_msg_get(msg, values_f); + const size_t size = upb_array_size(values); + const size_t i; + bool first = true; + + jsonenc_putstr(e, "["); + + for (i = 0; i < size; i++) { + upb_msgval elem = upb_array_get(arr, i); + + jsonenc_putsep(e, &first); + jsonenc_value(e, elem.msg_val, values_m); + } + + jsonenc_putstr(e, "]"); +} + +static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + /* TODO(haberman): do we want a reflection method to get oneof case? */ + size_t iter = UPB_MSG_BEGIN; + const upb_fielddef *f; + upb_msgval val; + + if (!upb_msg_next(msg, m, NULL, &f, &val, &iter)) { + jsonenc_err(e, "No value set in Value proto"); + } + + switch (upb_fielddef_number(f)) { + case 1: + jsonenc_putstr("null"); + break; + case 2: + jsonenc_double(e, "%.17g", val.double_val); + break; + case 3: + jsonenc_string(e, val.str_val); + break; + case 4: + jsonenc_putstr(e, val.bool_val ? "true" : "false"); + break; + case 5: + jsonenc_struct(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + case 6: + jsonenc_listvalue(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + } +} + +static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + switch (upb_msgdef_wellknowntype(m)) { + case UPB_WELLKNOWN_UNSPECIFIED: + jsonenc_putstr(e, "{"); + jsonenc_msg(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + case UPB_WELLKNOWN_ANY: + jsonenc_any(e, msg, m); + break; + case UPB_WELLKNOWN_FIELDMASK: + case UPB_WELLKNOWN_DURATION: + jsonenc_duration(e, msg, m); + break; + case UPB_WELLKNOWN_TIMESTAMP: + jsonenc_timestamp(e, msg, m); + break; + case UPB_WELLKNOWN_DOUBLEVALUE: + case UPB_WELLKNOWN_FLOATVALUE: + case UPB_WELLKNOWN_INT64VALUE: + case UPB_WELLKNOWN_UINT64VALUE: + case UPB_WELLKNOWN_INT32VALUE: + case UPB_WELLKNOWN_UINT32VALUE: + case UPB_WELLKNOWN_STRINGVALUE: + case UPB_WELLKNOWN_BYTESVALUE: + case UPB_WELLKNOWN_BOOLVALUE: + jsonenc_wrapper(e, msg, m); + break; + case UPB_WELLKNOWN_VALUE: + jsonenc_value(e, msg, m); + break; + case UPB_WELLKNOWN_LISTVALUE, + jsonenc_listvalue(e, msg, m); + break; + case UPB_WELLKNOWN_STRUCT + jsonenc_listvalue(e, msg, m); + break; + } +} + +static void jsonenc_msgval(jsonenc *e, upb_msgval val, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + jsonenc_putstr(e, val.bool_val ? "true" : "false"); + break; + case UPB_TYPE_FLOAT: + jsonenc_double(e, "%.8g", val.float_val); + break; + case UPB_TYPE_DOUBLE: + jsonenc_double(e, "%.17g", val.double_val); + break; + case UPB_TYPE_INT32: + jsonenc_printf(e, "%" PRId32, val.int32_val); + break; + case UPB_TYPE_UINT32: + jsonenc_printf(e, "%" PRIu32, val.uint32_val); + break; + case UPB_TYPE_INT64: + jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val); + break; + case UPB_TYPE_UINT64: + jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val); + break; + case UPB_TYPE_STRING: + jsonenc_string(e, val.str_val); + break; + case UPB_TYPE_BYTES: + jsonenc_bytes(e, val.str_val); + break; + case UPB_TYPE_ENUM: + jsonenc_enum(val.int32_val, f, e); + break; + case UPB_TYPE_MESSAGE: + jsonenc_msgfield(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + } +} + +static void jsonenc_mapkey(jsonenc *e, upb_msgval val, const upb_fielddef *f) { + jsonend_putstr("\""); + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + jsonenc_putstr(e, val.bool_val ? "true" : "false"); + break; + case UPB_TYPE_INT32: + jsonenc_printf(e, "%" PRId32, val.int32_val); + break; + case UPB_TYPE_UINT32: + jsonenc_printf(e, "%" PRIu32, val.uint32_val); + break; + case UPB_TYPE_INT64: + jsonenc_printf(e, "%" PRId64, val.int64_val); + break; + case UPB_TYPE_UINT64: + jsonenc_printf(e, "%" PRIu64, val.uint64_val); + break; + case UPB_TYPE_STRING: + jsonenc_string(e, val.str_val, false); + break; + } + + jsonend_putstr("\": "); +} + +static void jsonenc_array(jsonenc *e, const upb_array *arr, + const upb_fielddef *f) { + size_t i; + size_t size = upb_array_size(arr); + bool first = true; + + jsonenc_putstr("["); + + for (i = 0; i < size; i++) { + jsonenc_putsep(e, &first); + jsonenc_msgval(e, upb_array_get(arr, i), f); + } + + jsonenc_putstr("]"); +} + +static void jsonenc_map(jsonenc *e, const upb_map *map, const upb_fielddef *f) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); + const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); + size_t iter = UPB_MAP_BEGIN; + bool first = true; + + jsonenc_putstr("{"); + + while (upb_mapiter_next(map, &iter)) { + jsonenc_putsep(e, &first); + jsonenc_mapkey(e, upb_mapiter_key(map, iter), key_f); + jsonenc_msgval(e, upb_mapiter_value(map, iter), val_f); + } + + jsonenc_putstr("}"); +} + +static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, + upb_msgval val, bool *first) { + char buf[128]; + const char *name; + + if (e->options & UPB_JSONENC_PROTONAMES) { + name = upb_fielddef_name(f); + } else { + /* TODO(haberman): we need a better JSON name API. */ + upb_fielddef_getjsonname(f, buf, sizeof(buf)); + name = buf; + } + + jsonenc_putsep(e, first); + jsonenc_printf(e, "\"%s\": ", name) + + if (upb_fielddef_ismap(f)) { + jsonenc_map(e, val.map_val, f); + } else if (upb_fielddef_isseq(f)) { + jsonenc_array(e, val.array_val, f); + } else { + jsonenc_field(e, val, f); + } +} + +static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + upb_msgval val; + const upb_fielddef *f; + bool first = true; + + jsonenc_putstr("{"); + + if (e->options & UPB_JSONENC_EMITDEFAULTS) { + /* Iterate over all fields. */ + upb_msg_field_iter i; + for (upb_msg_field_begin(&i, m); !upb_msg_field_done(&i); + upb_msg_field_next(&i)) { + f = upb_msg_iter_field(&i); + jsonenc_fieldval(e, f, upb_msg_get(msg, f), &first); + } + } else { + /* Iterate over non-empty fields. */ + size_t iter = UPB_MSG_BEGIN; + while (upb_msg_next(msg, m, e->ext_pool, &f, &val, &iter)) { + jsonenc_fieldval(e, f, val, &first); + } + } + + jsonenc_putstr("}"); +} + +size_t jsonenc_nullz(jsonenc *e, size_t size) { + size_t ret = e->ptr - e->buf + e->overflow; + + if (size > 0) { + if (e->ptr == e->end) e->ptr--; + *e->ptr = '\0'; + } + + return ret; +} + +size_t upb_jsonencode(const upb_msg *msg, const upb_msgdef *m, + const upb_symtab *ext_pool, int options, char *buf, + size_t size) { + jsonenc e; + + e.buf = buf; + e.ptr = buf; + e.end = buf + size; + e.overflow = 0; + e.options = options; + e.ext_pool = ext_pool; + + jsonenc_msg(&e, msg, m); + return jsonenc_nullz(&e, size); +} diff --git a/upb/json_encode.h b/upb/json_encode.h new file mode 100644 index 0000000000..41fdd82e23 --- /dev/null +++ b/upb/json_encode.h @@ -0,0 +1,36 @@ + +#ifndef UPB_JSONENCODE_H_ +#define UPB_JSONENCODE_H_ + +#include "upb/def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + /* When set, emits 0/default values. TOOD(haberman): proto3 only? */ + UPB_JSONENC_EMITDEFAULTS = 1, + + /* When set, use normal (snake_caes) field names instead of JSON (camelCase) + names. */ + UPB_JSONENC_PROTONAMES = 2 +}; + +/* Encodes the given |msg| to JSON format. The message's reflection is given in + * |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions + * will not be printed). + * + * Output is placed in the given buffer, and always NULL-terminated. The output + * size (excluding NULL) is returned. This means that a return value >= |size| + * implies that the output was truncated. (These are the same semantics as + * snprintf()). */ +size_t upb_jsonencode(const upb_msg *msg, const upb_msgdef *m, + const upb_symtab *ext_pool, int options, char *buf, + size_t size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_JSONENCODE_H_ */