Support maps in JSON parsing and serialization.

This is a sync of our internal developing of JSON parsing and
serialization. It implements native understanding of MapEntry
submessages, so that map fields with (key, value) pairs are serialized
as JSON maps (objects) natively rather than as arrays of objects with
'key' and 'value' fields. The parser also now understands how to emit
handler calls corresponding to MapEntry objects when processing a map
field.

This sync also picks up a bugfix in `table.c` to handle an alloc-failed
case.
pull/13171/head
Chris Fallin 10 years ago
parent 51513c6e7f
commit fb58504569
  1. 98
      tests/json/test_json.cc
  2. 34
      tests/test_def.c
  3. 20
      upb/def.c
  4. 7
      upb/def.h
  5. 20
      upb/json/parser.h
  6. 245
      upb/json/parser.rl
  7. 275
      upb/json/printer.c
  8. 8
      upb/table.c

@ -85,6 +85,33 @@ static TestCase kTestRoundtripMessages[] = {
TEST("{\"optional_string\":\"\\uFFFF\"}"),
EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}")
},
// map-field tests
{
TEST("{\"map_string_string\":{\"a\":\"value1\",\"b\":\"value2\","
"\"c\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_int32_string\":{\"1\":\"value1\",\"-1\":\"value2\","
"\"1234\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_bool_string\":{\"false\":\"value1\",\"true\":\"value2\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_int32\":{\"asdf\":1234,\"jkl;\":-1}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_bool\":{\"asdf\":true,\"jkl;\":false}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_msg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"),
EXPECT_SAME
},
TEST_SENTINEL
};
@ -115,6 +142,53 @@ static const upb::MessageDef* BuildTestMessage(
submsg->set_full_name("SubMessage", &st);
AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false);
// Create MapEntryStringString.
upb::reffed_ptr<upb::MessageDef> mapentry_string_string(
upb::MessageDef::New());
mapentry_string_string->set_full_name("MapEntry_String_String", &st);
mapentry_string_string->setmapentry(true);
AddField(mapentry_string_string.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_string.get(), 2, "value", UPB_TYPE_STRING, false);
// Create MapEntryInt32String.
upb::reffed_ptr<upb::MessageDef> mapentry_int32_string(
upb::MessageDef::New());
mapentry_int32_string->set_full_name("MapEntry_Int32_String", &st);
mapentry_int32_string->setmapentry(true);
AddField(mapentry_int32_string.get(), 1, "key", UPB_TYPE_INT32, false);
AddField(mapentry_int32_string.get(), 2, "value", UPB_TYPE_STRING, false);
// Create MapEntryBoolString.
upb::reffed_ptr<upb::MessageDef> mapentry_bool_string(
upb::MessageDef::New());
mapentry_bool_string->set_full_name("MapEntry_Bool_String", &st);
mapentry_bool_string->setmapentry(true);
AddField(mapentry_bool_string.get(), 1, "key", UPB_TYPE_BOOL, false);
AddField(mapentry_bool_string.get(), 2, "value", UPB_TYPE_STRING, false);
// Create MapEntryStringInt32.
upb::reffed_ptr<upb::MessageDef> mapentry_string_int32(
upb::MessageDef::New());
mapentry_string_int32->set_full_name("MapEntry_String_Int32", &st);
mapentry_string_int32->setmapentry(true);
AddField(mapentry_string_int32.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_int32.get(), 2, "value", UPB_TYPE_INT32, false);
// Create MapEntryStringBool.
upb::reffed_ptr<upb::MessageDef> mapentry_string_bool(upb::MessageDef::New());
mapentry_string_bool->set_full_name("MapEntry_String_Bool", &st);
mapentry_string_bool->setmapentry(true);
AddField(mapentry_string_bool.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_bool.get(), 2, "value", UPB_TYPE_BOOL, false);
// Create MapEntryStringMessage.
upb::reffed_ptr<upb::MessageDef> mapentry_string_msg(upb::MessageDef::New());
mapentry_string_msg->set_full_name("MapEntry_String_Message", &st);
mapentry_string_msg->setmapentry(true);
AddField(mapentry_string_msg.get(), 1, "key", UPB_TYPE_STRING, false);
AddField(mapentry_string_msg.get(), 2, "value", UPB_TYPE_MESSAGE, false,
upb::upcast(submsg.get()));
// Create MyEnum.
upb::reffed_ptr<upb::EnumDef> myenum(upb::EnumDef::New());
myenum->set_full_name("MyEnum", &st);
@ -150,13 +224,33 @@ static const upb::MessageDef* BuildTestMessage(
AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true,
upb::upcast(myenum.get()));
AddField(md.get(), 20, "map_string_string", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_string.get()));
AddField(md.get(), 21, "map_int32_string", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_int32_string.get()));
AddField(md.get(), 22, "map_bool_string", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_bool_string.get()));
AddField(md.get(), 23, "map_string_int32", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_int32.get()));
AddField(md.get(), 24, "map_string_bool", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_bool.get()));
AddField(md.get(), 25, "map_string_msg", UPB_TYPE_MESSAGE, true,
upb::upcast(mapentry_string_msg.get()));
// Add both to our symtab.
upb::Def* defs[3] = {
upb::Def* defs[9] = {
upb::upcast(submsg.ReleaseTo(&defs)),
upb::upcast(myenum.ReleaseTo(&defs)),
upb::upcast(md.ReleaseTo(&defs)),
upb::upcast(mapentry_string_string.ReleaseTo(&defs)),
upb::upcast(mapentry_int32_string.ReleaseTo(&defs)),
upb::upcast(mapentry_bool_string.ReleaseTo(&defs)),
upb::upcast(mapentry_string_int32.ReleaseTo(&defs)),
upb::upcast(mapentry_string_bool.ReleaseTo(&defs)),
upb::upcast(mapentry_string_msg.ReleaseTo(&defs)),
};
symtab->Add(defs, 3, &defs, &st);
symtab->Add(defs, 9, &defs, &st);
ASSERT(st.ok());
// Return TestMessage.
return symtab->LookupMessage("TestMessage");

@ -344,6 +344,39 @@ static void test_descriptor_flags() {
upb_msgdef_unref(m2, &m2);
}
static void test_mapentry_check() {
upb_status s = UPB_STATUS_INIT;
upb_msgdef *m = upb_msgdef_new(&m);
upb_msgdef_setfullname(m, "TestMessage", &s);
upb_fielddef *f = upb_fielddef_new(&f);
upb_fielddef_setname(f, "field1", &s);
upb_fielddef_setnumber(f, 1, &s);
upb_fielddef_setlabel(f, UPB_LABEL_OPTIONAL);
upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
upb_fielddef_setsubdefname(f, ".MapEntry", &s);
upb_msgdef_addfield(m, f, &f, &s);
ASSERT(upb_ok(&s));
upb_msgdef *subm = upb_msgdef_new(&subm);
upb_msgdef_setfullname(subm, "MapEntry", &s);
upb_msgdef_setmapentry(subm, true);
upb_symtab *symtab = upb_symtab_new(&symtab);
upb_def *defs[] = {UPB_UPCAST(m), UPB_UPCAST(subm)};
upb_symtab_add(symtab, defs, 2, NULL, &s);
// Should not have succeeded: non-repeated field pointing to a MapEntry.
ASSERT(!upb_ok(&s));
upb_fielddef_setlabel(f, UPB_LABEL_REPEATED);
upb_symtab_add(symtab, defs, 2, NULL, &s);
ASSERT(upb_ok(&s));
upb_symtab_unref(symtab, &symtab);
upb_msgdef_unref(subm, &subm);
upb_msgdef_unref(m, &m);
}
static void test_oneofs() {
upb_status s = UPB_STATUS_INIT;
bool ok = true;
@ -412,6 +445,7 @@ int run_tests(int argc, char *argv[]) {
test_partial_freeze();
test_noreftracking();
test_descriptor_flags();
test_mapentry_check();
test_oneofs();
return 0;
}

@ -211,6 +211,21 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
}
// Ensure that MapEntry submessages only appear as repeated fields, not
// optional/required (singular) fields.
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
upb_fielddef_msgsubdef(f) != NULL) {
const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
upb_status_seterrf(s,
"Field %s refers to mapentry message but is not "
"a repeated field",
upb_fielddef_name(f) ? upb_fielddef_name(f) :
"(unnamed)");
return false;
}
}
return true;
}
@ -1243,6 +1258,11 @@ bool upb_fielddef_isprimitive(const upb_fielddef *f) {
return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
}
bool upb_fielddef_ismap(const upb_fielddef *f) {
return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
}
bool upb_fielddef_hassubdef(const upb_fielddef *f) {
return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
}

@ -368,6 +368,7 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD,
bool IsString() const;
bool IsSequence() const;
bool IsPrimitive() const;
bool IsMap() const;
// How integers are encoded. Only meaningful for integer types.
// Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes.
@ -592,6 +593,7 @@ bool upb_fielddef_issubmsg(const upb_fielddef *f);
bool upb_fielddef_isstring(const upb_fielddef *f);
bool upb_fielddef_isseq(const upb_fielddef *f);
bool upb_fielddef_isprimitive(const upb_fielddef *f);
bool upb_fielddef_ismap(const upb_fielddef *f);
int64_t upb_fielddef_defaultint64(const upb_fielddef *f);
int32_t upb_fielddef_defaultint32(const upb_fielddef *f);
uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f);
@ -980,6 +982,10 @@ UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m,
void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
bool upb_msgdef_mapentry(const upb_msgdef *m);
// Well-known field tag numbers for map-entry messages.
#define UPB_MAPENTRY_KEY 1
#define UPB_MAPENTRY_VALUE 2
const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m,
const char *name);
int upb_msgdef_numoneofs(const upb_msgdef *m);
@ -1479,6 +1485,7 @@ inline bool FieldDef::IsSubMessage() const {
}
inline bool FieldDef::IsString() const { return upb_fielddef_isstring(this); }
inline bool FieldDef::IsSequence() const { return upb_fielddef_isseq(this); }
inline bool FieldDef::IsMap() const { return upb_fielddef_ismap(this); }
inline int64_t FieldDef::default_int64() const {
return upb_fielddef_defaultint64(this);
}

@ -23,12 +23,30 @@ class Parser;
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
// Internal-only struct used by the parser.
// Internal-only struct used by the parser. A parser frame corresponds
// one-to-one with a handler (sink) frame.
typedef struct {
UPB_PRIVATE_FOR_CPP
upb_sink sink;
// The current message in which we're parsing, and the field whose value we're
// expecting next.
const upb_msgdef *m;
const upb_fielddef *f;
// We are in a repeated-field context, ready to emit mapentries as
// submessages. This flag alters the start-of-object (open-brace) behavior to
// begin a sequence of mapentry messages rather than a single submessage.
bool is_map;
// We are in a map-entry message context. This flag is set when parsing the
// value field of a single map entry and indicates to all value-field parsers
// (subobjects, strings, numbers, and bools) that the map-entry submessage
// should end as soon as the value is parsed.
bool is_mapentry;
// If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
// message's map field that we're currently parsing. This differs from |f|
// because |f| is the field in the *current* message (i.e., the map-entry
// message itself), not the parent's field that leads to this map.
const upb_fielddef *mapfield;
} upb_jsonparser_frame;

@ -221,7 +221,6 @@ badpadding:
// the true value in a contiguous buffer.
static void assert_accumulate_empty(upb_json_parser *p) {
UPB_UNUSED(p);
assert(p->accumulated == NULL);
assert(p->accumulated_len == 0);
}
@ -506,6 +505,8 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool parse_number(upb_json_parser *p, const char *buf, const char *end);
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
@ -520,8 +521,12 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len - 1; // One for NULL.
char *end;
return parse_number(p, buf, myend);
}
static bool parse_number(upb_json_parser *p, const char *buf,
const char *myend) {
char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@ -577,6 +582,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
multipart_end(p);
return true;
err:
@ -595,6 +601,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
@ -611,6 +618,8 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@ -688,6 +697,7 @@ static bool end_stringval(upb_json_parser *p) {
}
multipart_end(p);
return ok;
}
@ -696,54 +706,217 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
static bool end_member(upb_json_parser *p) {
assert(!p->top->f);
// Helper: invoked during parse_mapentry() to emit the mapentry message's key
// field based on the current contents of the accumulate buffer.
static bool parse_mapentry_key(upb_json_parser *p) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
// Emit the key field. We do a bit of ad-hoc parsing here because the
// parser state machine has already decided that this is a string field
// name, and we are reinterpreting it as some arbitrary key type. In
// particular, integer and bool keys are quoted, so we need to parse the
// quoted string contents here.
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no key");
return false;
}
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
// Invoke end_number. The accum buffer has the number's text already.
if (!parse_number(p, buf, buf + len)) {
return false;
}
break;
case UPB_TYPE_BOOL:
if (len == 4 && !strncmp(buf, "true", 4)) {
if (!parser_putbool(p, true)) {
return false;
}
} else if (len == 5 && !strncmp(buf, "false", 5)) {
if (!parser_putbool(p, false)) {
return false;
}
} else {
upb_status_seterrmsg(p->status,
"Map bool key not 'true' or 'false'");
return false;
}
multipart_end(p);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
upb_sink subsink;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, len, &subsink);
sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
upb_sink_putstring(&subsink, sel, buf, len, NULL);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&subsink, sel);
multipart_end(p);
break;
}
default:
upb_status_seterrmsg(p->status, "Invalid field type for map key");
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
// Helper: emit one map entry (as a submessage in the map field sequence). This
// is invoked from end_membername(), at the end of the map entry's key string,
// with the map key in the accumulate buffer. It parses the key from that
// buffer, emits the handler calls to start the mapentry submessage (setting up
// its subframe in the process), and sets up state in the subframe so that the
// value parser (invoked next) will emit the mapentry's value field and then
// end the mapentry message.
static bool handle_mapentry(upb_json_parser *p) {
// Map entry: p->top->sink is the seq frame, so we need to start a frame
// for the mapentry itself, and then set |f| in that frame so that the map
// value field is parsed, and also set a flag to end the frame after the
// map-entry value is parsed.
if (!check_stack(p)) return false;
const upb_fielddef *mapfield = p->top->mapfield;
const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
upb_jsonparser_frame *inner = p->top + 1;
p->top->f = mapfield;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->mapfield = mapfield;
inner->is_map = false;
// Don't set this to true *yet* -- we reuse parsing handlers below to push
// the key field value to the sink, and these handlers will pop the frame
// if they see is_mapentry (when invoked by the parser state machine, they
// would have just seen the map-entry value, not key).
inner->is_mapentry = false;
p->top = inner;
// send STARTMSG in submsg frame.
upb_sink_startmsg(&p->top->sink);
parse_mapentry_key(p);
// Set up the value field to receive the map-entry value.
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
p->top->is_mapentry = true; // set up to pop frame after value is parsed.
p->top->mapfield = mapfield;
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no value");
return false;
}
return true;
}
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool end_membername(upb_json_parser *p) {
assert(!p->top->f);
if (p->top->is_map) {
return handle_mapentry(p);
} else {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
}
static void end_member(upb_json_parser *p) {
// If we just parsed a map-entry value, end that frame too.
if (p->top->is_mapentry) {
assert(p->top > p->stack);
// send ENDMSG on submsg.
upb_status s = UPB_STATUS_INIT;
upb_sink_endmsg(&p->top->sink, &s);
const upb_fielddef* mapfield = p->top->mapfield;
// send ENDSUBMSG in repeated-field-of-mapentries frame.
p->top--;
upb_selector_t sel;
bool ok = upb_handlers_getselector(mapfield,
UPB_HANDLER_ENDSUBMSG, &sel);
UPB_ASSERT_VAR(ok, ok);
upb_sink_endsubmsg(&p->top->sink, sel);
}
p->top->f = NULL;
}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
if (upb_fielddef_ismap(p->top->f)) {
// Beginning of a map. Start a new parser frame in a repeated-field
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->mapfield = p->top->f;
inner->f = NULL;
inner->is_map = true;
inner->is_mapentry = false;
p->top = inner;
return true;
} else if (upb_fielddef_issubmsg(p->top->f)) {
// Beginning of a subobject. Start a new parser frame in the submsg
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
} else {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
return true;
}
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
if (p->top->is_map) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
} else {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
}
static bool start_array(upb_json_parser *p) {
@ -763,6 +936,8 @@ static bool start_array(upb_json_parser *p) {
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
@ -777,12 +952,16 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
if (!p->top->is_map) {
upb_sink_startmsg(&p->top->sink);
}
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
if (!p->top->is_map) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
}
@ -854,10 +1033,10 @@ static void end_object(upb_json_parser *p) {
ws
string
>{ start_member(parser); }
@{ CHECK_RETURN_TOP(end_member(parser)); }
@{ CHECK_RETURN_TOP(end_membername(parser)); }
ws ":" ws
value2
%{ clear_member(parser); }
%{ end_member(parser); }
ws;
object =
@ -967,6 +1146,8 @@ void upb_json_parser_uninit(upb_json_parser *p) {
void upb_json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
int cs;
int top;

@ -182,11 +182,19 @@ static bool putkey(void *closure, const void *handler_data) {
return true; \
} \
static bool repeated_##type(void *closure, const void *handler_data, \
type val) { \
type val) { \
upb_json_printer *p = closure; \
print_comma(p); \
CHK(put##type(closure, handler_data, val)); \
return true; \
} \
static bool putmapkey_##type(void *closure, const void *handler_data, \
type val) { \
upb_json_printer *p = closure; \
print_data(p, "\"", 1); \
CHK(put##type(closure, handler_data, val)); \
print_data(p, "\":", 2); \
return true; \
}
TYPE_HANDLERS(double, fmt_double);
@ -222,20 +230,36 @@ static bool scalar_enum(void *closure, const void *handler_data,
return true;
}
static bool repeated_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_comma(p);
const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
static void print_enum_symbolic_name(upb_json_printer *p,
const upb_enumdef *def,
int32_t val) {
const char *symbolic_name = upb_enumdef_iton(def, val);
if (symbolic_name) {
print_data(p, "\"", 1);
putstring(p, symbolic_name, strlen(symbolic_name));
print_data(p, "\"", 1);
} else {
putint32_t(closure, NULL, val);
putint32_t(p, NULL, val);
}
}
static bool repeated_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_comma(p);
print_enum_symbolic_name(p, hd->enumdef, val);
return true;
}
static bool mapvalue_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_enum_symbolic_name(p, hd->enumdef, val);
return true;
}
@ -251,25 +275,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) {
return closure;
}
static bool startmap(void *closure, const void *handler_data) {
static void start_frame(upb_json_printer *p) {
p->depth_++;
p->first_elem_[p->depth_] = true;
print_data(p, "{", 1);
}
static void end_frame(upb_json_printer *p) {
print_data(p, "}", 1);
p->depth_--;
}
static bool printer_startmsg(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
if (p->depth_++ == 0) {
if (p->depth_ == 0) {
upb_bytessink_start(p->output_, 0, &p->subc_);
}
p->first_elem_[p->depth_] = true;
print_data(p, "{", 1);
start_frame(p);
return true;
}
static bool endmap(void *closure, const void *handler_data, upb_status *s) {
static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
UPB_UNUSED(handler_data);
UPB_UNUSED(s);
upb_json_printer *p = closure;
if (--p->depth_ == 0) {
end_frame(p);
if (p->depth_ == 0) {
upb_bytessink_end(p->output_);
}
print_data(p, "}", 1);
return true;
}
@ -290,6 +324,23 @@ static bool endseq(void *closure, const void *handler_data) {
return true;
}
static void *startmap(void *closure, const void *handler_data) {
upb_json_printer *p = closure;
CHK(putkey(closure, handler_data));
p->depth_++;
p->first_elem_[p->depth_] = true;
print_data(p, "{", 1);
return closure;
}
static bool endmap(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "}", 1);
p->depth_--;
return true;
}
static size_t putstr(void *closure, const void *handler_data, const char *str,
size_t len, const upb_bufhandle *handle) {
UPB_UNUSED(handler_data);
@ -404,6 +455,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) {
return true;
}
static void *mapkeyval_startstr(void *closure, const void *handler_data,
size_t size_hint) {
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return p;
}
static size_t mapkey_str(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
CHK(putstr(closure, handler_data, str, len, handle));
return len;
}
static bool mapkey_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "\":", 2);
return true;
}
static bool mapvalue_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return true;
}
static size_t scalar_bytes(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
@ -421,31 +502,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data,
return len;
}
void printer_sethandlers(const void *closure, upb_handlers *h) {
static size_t mapkey_bytes(void *closure, const void *handler_data,
const char *str, size_t len,
const upb_bufhandle *handle) {
upb_json_printer *p = closure;
CHK(putbytes(closure, handler_data, str, len, handle));
print_data(p, ":", 1);
return len;
}
static void set_enum_hd(upb_handlers *h,
const upb_fielddef *f,
upb_handlerattr *attr) {
EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
hd->keyname = newstrpc(h, f);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr_sethandlerdata(attr, hd);
}
// Set up handlers for a mapentry submessage (i.e., an individual key/value pair
// in a map).
//
// TODO: Handle missing key, missing value, out-of-order key/value, or repeated
// key or value cases properly. The right way to do this is to allocate a
// temporary structure at the start of a mapentry submessage, store key and
// value data in it as key and value handlers are called, and then print the
// key/value pair once at the end of the submessage. If we don't do this, we
// should at least detect the case and throw an error. However, so far all of
// our sources that emit mapentry messages do so canonically (with one key
// field, and then one value field), so this is not a pressing concern at the
// moment.
void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
const upb_msgdef *md = upb_handlers_msgdef(h);
// A mapentry message is printed simply as '"key": value'. Rather than
// special-case key and value for every type below, we just handle both
// fields explicitly here.
const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlers_setstartmsg(h, startmap, &empty_attr);
upb_handlers_setendmsg(h, endmap, &empty_attr);
#define TYPE(type, name, ctype) \
case type: \
if (upb_fielddef_isseq(f)) { \
upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
} else { \
upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
} \
switch (upb_fielddef_type(key_field)) {
case UPB_TYPE_INT32:
upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
break;
case UPB_TYPE_INT64:
upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
break;
case UPB_TYPE_UINT32:
upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
break;
case UPB_TYPE_UINT64:
upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
break;
case UPB_TYPE_BOOL:
upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
break;
case UPB_TYPE_STRING:
upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
break;
case UPB_TYPE_BYTES:
upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
break;
default:
assert(false);
break;
}
switch (upb_fielddef_type(value_field)) {
case UPB_TYPE_INT32:
upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
break;
case UPB_TYPE_INT64:
upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
break;
case UPB_TYPE_UINT32:
upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
break;
case UPB_TYPE_UINT64:
upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
break;
case UPB_TYPE_BOOL:
upb_handlers_setbool(h, value_field, putbool, &empty_attr);
break;
case UPB_TYPE_FLOAT:
upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
break;
case UPB_TYPE_DOUBLE:
upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
break;
case UPB_TYPE_STRING:
upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
upb_handlers_setstring(h, value_field, putstr, &empty_attr);
upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
break;
case UPB_TYPE_BYTES:
upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
break;
case UPB_TYPE_ENUM: {
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
set_enum_hd(h, value_field, &enum_attr);
upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
upb_handlerattr_uninit(&enum_attr);
break;
}
case UPB_TYPE_MESSAGE:
// No handler necessary -- the submsg handlers will print the message
// as appropriate.
break;
}
upb_handlerattr_uninit(&empty_attr);
}
void printer_sethandlers(const void *closure, upb_handlers *h) {
UPB_UNUSED(closure);
const upb_msgdef *md = upb_handlers_msgdef(h);
bool is_mapentry = upb_msgdef_mapentry(md);
upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
if (is_mapentry) {
// mapentry messages are sufficiently different that we handle them
// separately.
printer_sethandlers_mapentry(closure, h);
return;
}
upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
#define TYPE(type, name, ctype) \
case type: \
if (upb_fielddef_isseq(f)) { \
upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
} else { \
upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
} \
break;
upb_msg_field_iter i;
upb_msg_field_begin(&i, upb_handlers_msgdef(h));
upb_msg_field_begin(&i, md);
for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
if (upb_fielddef_isseq(f)) {
if (upb_fielddef_ismap(f)) {
upb_handlers_setstartseq(h, f, startmap, &name_attr);
upb_handlers_setendseq(h, f, endmap, &name_attr);
} else if (upb_fielddef_isseq(f)) {
upb_handlers_setstartseq(h, f, startseq, &name_attr);
upb_handlers_setendseq(h, f, endseq, &empty_attr);
}
@ -462,12 +673,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
// For now, we always emit symbolic names for enums. We may want an
// option later to control this behavior, but we will wait for a real
// need first.
EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
hd->keyname = newstrpc(h, f);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&enum_attr, hd);
set_enum_hd(h, f, &enum_attr);
if (upb_fielddef_isseq(f)) {
upb_handlers_setint32(h, f, repeated_enum, &enum_attr);

@ -40,12 +40,16 @@ char *upb_strdup(const char *s) {
}
char *upb_strdup2(const char *s, size_t len) {
// Prevent overflow errors.
if (len == SIZE_MAX) return NULL;
// Always null-terminate, even if binary data; but don't rely on the input to
// have a null-terminating byte since it may be a raw binary buffer.
size_t n = len + 1;
char *p = malloc(n);
if (p) memcpy(p, s, len);
p[len] = 0;
if (p) {
memcpy(p, s, len);
p[len] = 0;
}
return p;
}

Loading…
Cancel
Save