[jsonencoder]: wrote most of the code, haven't even compiled yet, let alone tested.

pull/13171/head
Joshua Haberman 5 years ago
parent 1da9cfde77
commit 60d0966a0b
  1. 577
      upb/json_encode.c
  2. 36
      upb/json_encode.h

@ -0,0 +1,577 @@
#include "upb/jsonencode.h"
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include "upb/decoder.h"
#include "upb/port_def.inc"
#include "upb/reflection.h"
typedef struct {
char *buf, *ptr, *end;
size_t overflow;
int indent_depth;
int options;
const upb_symtab *ext_pool;
upb_arena *arena;
} jsonenc;
static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m);
static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) {
size_t have = e->end - e->ptr;
if (UPB_LIKELY(have >= len)) {
memcpy(e->ptr, data, len);
e->ptr += len;
} else {
memcpy(e->ptr, data, have);
e->ptr += have;
e->overflow += (len - have);
}
}
static void jsonenc_putstr(jsonenc *e, const char *str) {
jsonenc_putbytes(e, str, strlen(str));
}
static void jsonenc_printf(jsonenc *e, const char *fmt, ...) {
size_t n;
size_t have = e->end - e->ptr;
va_list args;
va_start(args, fmt);
n = _upb_vsnprintf(e->ptr, have, fmt, args);
va_end(args);
if (UPB_LIKELY(have > n)) {
e->ptr += n;
} else {
e->ptr += have;
e->overflow += (n - have);
}
}
static void jsonenc_nanos(jsonenc *e, int32_t nanos) {
const char zeros[3] = "000";
if (nanos == 0) return;
if (nanos < 0 || nanos >= 1000000000) {
jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos");
}
jsonenc_printf(e, "%09" PRId32, nanos);
/* Remove trailing zeros, 3 at a time. */
while ((e->ptr - e->buf) >= 3 && memcmp(e->ptr, zeros, 3) == 0) {
e->ptr -= 3;
}
}
static bool jsonenc_timestamp(jsonenc *e, const upb_msg *msg,
const upb_msgdef *m) {
const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1);
const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2);
int64_t seconds = upb_msg_get(msg, seconds_f).int64_val;
int32_t nanos = upb_msg_get(msg, nanos_f).int32_val;
int L, N, I, J, K, h, m, s;
if (seconds < -62135596800) {
jsonenc_err(e,
"error formatting timestamp as JSON: minimum acceptable value "
"is 0001-01-01T00:00:00Z");
} else if (seconds > 253402300799) {
jsonenc_err(e,
"error formatting timestamp as JSON: maximum acceptable value "
"is 9999-12-31T23:59:59Z");
}
/* Julian Day -> Y/M/D, Algorithm from:
* Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for
* Processing Calendar Dates," Communications of the Association of
* Computing Machines, vol. 11 (1968), p. 657. */
L = days + adjustment;
N = 4 * L / 146097;
L = L - (146097 * N + 3) / 4;
I = 4000 * (L + 1) / 1461001;
L = L - 1461 * I / 4 + 31;
J = 80 * L / 2447;
K = L - 2447 * J / 80;
L = J / 11;
J = J + 2 - 12 * L;
I = 100 * (N - 49) + I + L;
s = seconds % 60;
m = (seconds / 60) % 60;
h = (seconds / 3600) % 24;
jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, h, m, s);
jsonenc_nanos(nanos);
jsonenc_putstr(e, "Z\"");
}
static bool jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1);
const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2);
int64_t seconds = upb_msg_get(msg, seconds_f).int64_val;
int32_t nanos = upb_msg_get(msg, nanos_f).int32_val;
if (seconds > 315576000000 || seconds < -315576000000 ||
(seconds < 0) != (nanos < 0)) {
jsonenc_err(e, "bad duration");
}
jsonenc_printf(e, "\"%" PRId64, seconds);
jsonenc_nanos(e, nanos);
jsonenc_putstr(e, "s\"");
}
static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) {
const upb_enumdef *e_def = upb_fielddef_enumsubdef(f);
const char *name = upb_enumdef_iton(e_def, val);
if (name) {
jsonenc_printf(e, "\"%s\"", name);
} else {
jsonenc_printf(e, "%" PRId32, val);
}
}
static void jsonenc_bytes(jsonenc *e, upb_strview str) {
/* This is the regular base64, not the "web-safe" version. */
static const char base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const char *ptr = str.data;
const char *end = ptr + str.size;
char buf[4];
jsonenc_putstr(p, "\"");
while (end - ptr >= 3) {
buf[0] = base64[ptr[0] >> 2];
buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)];
buf[3] = base64[ptr[2] & 0x3f];
jsonenc_putbytes(buf, 4);
ptr += 3;
}
switch (end - ptr) {
case 2:
buf[0] = base64[from[0] >> 2];
buf[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
buf[2] = base64[(from[1] & 0xf) << 2];
buf[3] = '=';
jsonenc_putbytes(buf, 4);
break;
case 1:
buf[0] = base64[from[0] >> 2];
buf[1] = base64[((from[0] & 0x3) << 4)];
buf[2] = '=';
buf[3] = '=';
jsonenc_putbytes(buf, 4);
break;
}
jsonenc_putstr(p, "\"");
}
static void jsonenc_string(jsonenc *e, upb_strview str) {
const char *ptr = str.data;
const char *end = ptr + str.size;
jsonenc_putstr(e, "\"");
while (ptr < end) {
switch (*ptr) {
case '\n':
jsonenc_putstr(e, "\\n");
break;
case '\r':
jsonenc_putstr(e, "\\r");
break;
case '\t':
jsonenc_putstr(e, "\\t");
break;
case '\"':
jsonenc_putstr(e, "\\\"");
break;
case '\f':
jsonenc_putstr(e, "\f'");
break;
case '\b':
jsonenc_putstr(e, "\b'");
break;
case '\\':
jsonenc_putstr(e, "\\\\");
break;
default:
if ((uint8_t)*ptr < 0x20) {
jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr);
} else {
/* This could be a non-ASCII byte. We rely on the string being valid
* UTF-8. */
jsonenc_putbytes(ptr, 1);
}
break;
}
ptr++;
}
jsonenc_putstr(e, "\"");
}
static void jsonenc_double(jsonenc *e, const char *fmt, double val) {
switch (val) {
case UPB_INFINITY:
jsonenc_putstr(e, "\"Infinity\"");
break;
case -UPB_INFINITY:
jsonenc_putstr(e, "\"-Infinity\"");
break;
default:
jsonenc_printf(e, fmt, val);
break;
}
}
static void jsonenc_wrapper(jsonenc *e, const upb_msg *msg,
const upb_msgdef *m) {
const upb_fielddef *val_f = upb_msgdef_itof(m, 1);
upb_msgval val = upb_msg_get(m, val_f);
jsonenc_msgval(e, val, val_f);
}
static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1);
const upb_fielddef *value_f = upb_msgdef_itof(m, 1);
upb_strview type_url = upb_msg_get(msg, type_url_f).str_val;
upb_strview value = upb_msg_get(msg, value_f).str_val;
const upb_msgdef *any_m = jsonenc_getanymsg(e, type_url);
const upb_msg *any = upb_msg_new(any_m, e->arena);
const upb_msglayout *any_layout = upb_msgdef_layout(any_m);
if (!upb_decode(value.data, value.size, any, any_layout, e->arena)) {
jsonenc_err("Error decoding message in Any");
}
jsonenc_putstr(e, "{\"@type\": ");
jsonenc_string(e, type_url);
jsonenc_putstr(e, ", ");
if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
/* Regular messages: {"@type": "...", "foo": 1, "bar": 2} */
jsonenc_msg(e, any, any_m);
} else {
/* Well-known type: {"@type": "...", "value": <well-known encoding>} */
jsonenc_putstr(e, "value: ");
jsonenc_msgfield(e, any, any_m);
}
jsonenc_putstr(e, "}");
}
static void jsonenc_putsep(jsonenc *e, bool *first) {
if (*first) {
*first = false;
} else {
jsonenc_putstr(", ");
}
}
static void jsonenc_struct(jsonenc *e, const upb_msg *msg,
const upb_msgdef *m) {
const upb_fielddef *fields_f = upb_msgdef_itof(m, 1);
const upb_map *fields = upb_msg_get(msg, fields_f);
const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f);
const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2);
size_t iter = UPB_MAP_BEGIN;
bool first = true;
jsonenc_putstr(e, "{");
while (upb_mapiter_next(fields, &iter)) {
upb_msgval key = upb_mapiter_key(map, iter);
upb_msgval val = upb_mapiter_value(map, iter);
jsonenc_putsep(e, &first);
jsonenc_string(e, key.str_val);
jsonenc_putstr(e, ": ");
jsonenc_value(e, val.msg_val, upb_fielddef_msgsubdef(value_f));
}
jsonenc_putstr(e, "}");
}
static void jsonenc_listvalue(jsonenc *e, const upb_msg *msg,
const upb_msgdef *m) {
const upb_fielddef *values_f = upb_msgdef_itof(m, 1);
const upb_msgdef *values_m = upb_fielddef_msgsubdef(values_f);
const upb_array *values = upb_msg_get(msg, values_f);
const size_t size = upb_array_size(values);
const size_t i;
bool first = true;
jsonenc_putstr(e, "[");
for (i = 0; i < size; i++) {
upb_msgval elem = upb_array_get(arr, i);
jsonenc_putsep(e, &first);
jsonenc_value(e, elem.msg_val, values_m);
}
jsonenc_putstr(e, "]");
}
static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
/* TODO(haberman): do we want a reflection method to get oneof case? */
size_t iter = UPB_MSG_BEGIN;
const upb_fielddef *f;
upb_msgval val;
if (!upb_msg_next(msg, m, NULL, &f, &val, &iter)) {
jsonenc_err(e, "No value set in Value proto");
}
switch (upb_fielddef_number(f)) {
case 1:
jsonenc_putstr("null");
break;
case 2:
jsonenc_double(e, "%.17g", val.double_val);
break;
case 3:
jsonenc_string(e, val.str_val);
break;
case 4:
jsonenc_putstr(e, val.bool_val ? "true" : "false");
break;
case 5:
jsonenc_struct(e, val.msg_val, upb_fielddef_msgsubdef(f));
break;
case 6:
jsonenc_listvalue(e, val.msg_val, upb_fielddef_msgsubdef(f));
break;
}
}
static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg,
const upb_msgdef *m) {
switch (upb_msgdef_wellknowntype(m)) {
case UPB_WELLKNOWN_UNSPECIFIED:
jsonenc_putstr(e, "{");
jsonenc_msg(e, val.msg_val, upb_fielddef_msgsubdef(f));
break;
case UPB_WELLKNOWN_ANY:
jsonenc_any(e, msg, m);
break;
case UPB_WELLKNOWN_FIELDMASK:
case UPB_WELLKNOWN_DURATION:
jsonenc_duration(e, msg, m);
break;
case UPB_WELLKNOWN_TIMESTAMP:
jsonenc_timestamp(e, msg, m);
break;
case UPB_WELLKNOWN_DOUBLEVALUE:
case UPB_WELLKNOWN_FLOATVALUE:
case UPB_WELLKNOWN_INT64VALUE:
case UPB_WELLKNOWN_UINT64VALUE:
case UPB_WELLKNOWN_INT32VALUE:
case UPB_WELLKNOWN_UINT32VALUE:
case UPB_WELLKNOWN_STRINGVALUE:
case UPB_WELLKNOWN_BYTESVALUE:
case UPB_WELLKNOWN_BOOLVALUE:
jsonenc_wrapper(e, msg, m);
break;
case UPB_WELLKNOWN_VALUE:
jsonenc_value(e, msg, m);
break;
case UPB_WELLKNOWN_LISTVALUE,
jsonenc_listvalue(e, msg, m);
break;
case UPB_WELLKNOWN_STRUCT
jsonenc_listvalue(e, msg, m);
break;
}
}
static void jsonenc_msgval(jsonenc *e, upb_msgval val, const upb_fielddef *f) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
jsonenc_putstr(e, val.bool_val ? "true" : "false");
break;
case UPB_TYPE_FLOAT:
jsonenc_double(e, "%.8g", val.float_val);
break;
case UPB_TYPE_DOUBLE:
jsonenc_double(e, "%.17g", val.double_val);
break;
case UPB_TYPE_INT32:
jsonenc_printf(e, "%" PRId32, val.int32_val);
break;
case UPB_TYPE_UINT32:
jsonenc_printf(e, "%" PRIu32, val.uint32_val);
break;
case UPB_TYPE_INT64:
jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val);
break;
case UPB_TYPE_UINT64:
jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val);
break;
case UPB_TYPE_STRING:
jsonenc_string(e, val.str_val);
break;
case UPB_TYPE_BYTES:
jsonenc_bytes(e, val.str_val);
break;
case UPB_TYPE_ENUM:
jsonenc_enum(val.int32_val, f, e);
break;
case UPB_TYPE_MESSAGE:
jsonenc_msgfield(e, val.msg_val, upb_fielddef_msgsubdef(f));
break;
}
}
static void jsonenc_mapkey(jsonenc *e, upb_msgval val, const upb_fielddef *f) {
jsonend_putstr("\"");
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
jsonenc_putstr(e, val.bool_val ? "true" : "false");
break;
case UPB_TYPE_INT32:
jsonenc_printf(e, "%" PRId32, val.int32_val);
break;
case UPB_TYPE_UINT32:
jsonenc_printf(e, "%" PRIu32, val.uint32_val);
break;
case UPB_TYPE_INT64:
jsonenc_printf(e, "%" PRId64, val.int64_val);
break;
case UPB_TYPE_UINT64:
jsonenc_printf(e, "%" PRIu64, val.uint64_val);
break;
case UPB_TYPE_STRING:
jsonenc_string(e, val.str_val, false);
break;
}
jsonend_putstr("\": ");
}
static void jsonenc_array(jsonenc *e, const upb_array *arr,
const upb_fielddef *f) {
size_t i;
size_t size = upb_array_size(arr);
bool first = true;
jsonenc_putstr("[");
for (i = 0; i < size; i++) {
jsonenc_putsep(e, &first);
jsonenc_msgval(e, upb_array_get(arr, i), f);
}
jsonenc_putstr("]");
}
static void jsonenc_map(jsonenc *e, const upb_map *map, const upb_fielddef *f) {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
size_t iter = UPB_MAP_BEGIN;
bool first = true;
jsonenc_putstr("{");
while (upb_mapiter_next(map, &iter)) {
jsonenc_putsep(e, &first);
jsonenc_mapkey(e, upb_mapiter_key(map, iter), key_f);
jsonenc_msgval(e, upb_mapiter_value(map, iter), val_f);
}
jsonenc_putstr("}");
}
static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f,
upb_msgval val, bool *first) {
char buf[128];
const char *name;
if (e->options & UPB_JSONENC_PROTONAMES) {
name = upb_fielddef_name(f);
} else {
/* TODO(haberman): we need a better JSON name API. */
upb_fielddef_getjsonname(f, buf, sizeof(buf));
name = buf;
}
jsonenc_putsep(e, first);
jsonenc_printf(e, "\"%s\": ", name)
if (upb_fielddef_ismap(f)) {
jsonenc_map(e, val.map_val, f);
} else if (upb_fielddef_isseq(f)) {
jsonenc_array(e, val.array_val, f);
} else {
jsonenc_field(e, val, f);
}
}
static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) {
upb_msgval val;
const upb_fielddef *f;
bool first = true;
jsonenc_putstr("{");
if (e->options & UPB_JSONENC_EMITDEFAULTS) {
/* Iterate over all fields. */
upb_msg_field_iter i;
for (upb_msg_field_begin(&i, m); !upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
f = upb_msg_iter_field(&i);
jsonenc_fieldval(e, f, upb_msg_get(msg, f), &first);
}
} else {
/* Iterate over non-empty fields. */
size_t iter = UPB_MSG_BEGIN;
while (upb_msg_next(msg, m, e->ext_pool, &f, &val, &iter)) {
jsonenc_fieldval(e, f, val, &first);
}
}
jsonenc_putstr("}");
}
size_t jsonenc_nullz(jsonenc *e, size_t size) {
size_t ret = e->ptr - e->buf + e->overflow;
if (size > 0) {
if (e->ptr == e->end) e->ptr--;
*e->ptr = '\0';
}
return ret;
}
size_t upb_jsonencode(const upb_msg *msg, const upb_msgdef *m,
const upb_symtab *ext_pool, int options, char *buf,
size_t size) {
jsonenc e;
e.buf = buf;
e.ptr = buf;
e.end = buf + size;
e.overflow = 0;
e.options = options;
e.ext_pool = ext_pool;
jsonenc_msg(&e, msg, m);
return jsonenc_nullz(&e, size);
}

@ -0,0 +1,36 @@
#ifndef UPB_JSONENCODE_H_
#define UPB_JSONENCODE_H_
#include "upb/def.h"
#ifdef __cplusplus
extern "C" {
#endif
enum {
/* When set, emits 0/default values. TOOD(haberman): proto3 only? */
UPB_JSONENC_EMITDEFAULTS = 1,
/* When set, use normal (snake_caes) field names instead of JSON (camelCase)
names. */
UPB_JSONENC_PROTONAMES = 2
};
/* Encodes the given |msg| to JSON format. The message's reflection is given in
* |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions
* will not be printed).
*
* Output is placed in the given buffer, and always NULL-terminated. The output
* size (excluding NULL) is returned. This means that a return value >= |size|
* implies that the output was truncated. (These are the same semantics as
* snprintf()). */
size_t upb_jsonencode(const upb_msg *msg, const upb_msgdef *m,
const upb_symtab *ext_pool, int options, char *buf,
size_t size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_JSONENCODE_H_ */
Loading…
Cancel
Save