From 8f8113b4fff748b57b0ff2f1a301e86b4703be84 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 9 Dec 2014 12:27:22 -0800 Subject: [PATCH] JSON test, symbolic enum names in JSON, and a few improvements. - Added a JSON test that round-trips (parses then re-serializes) several test messages, ensuring that the re-serialized form matches the original exactly. - Added support for printing and parsing symbolic enum names (rather than integer values) in JSON. - Updated JSON printer to properly handle string fields that come in multiple pieces. ('bytes' fields still do not support this, and this work is more challenging because it requires making the base64 encoder resumable. Base64 encoding is not separable at an input-byte granularity, unlike string escaping.) - Fixed a < vs. <= bug in UTF-8 encoding generation (oops). --- Makefile | 2 + tests/json/test_json.cc | 244 ++++++++++++++++++++++++++++++++++++++++ upb/bindings/lua/upb.c | 2 +- upb/def.c | 9 +- upb/def.h | 14 ++- upb/json/parser.c | 139 ++++++++++++++--------- upb/json/parser.rl | 69 ++++++++---- upb/json/printer.c | 122 +++++++++++++++++--- 8 files changed, 504 insertions(+), 97 deletions(-) create mode 100644 tests/json/test_json.cc diff --git a/Makefile b/Makefile index b2c40367c9..732f498936 100644 --- a/Makefile +++ b/Makefile @@ -235,6 +235,7 @@ C_TESTS = \ CC_TESTS = \ tests/pb/test_decoder \ + tests/json/test_json \ tests/test_cpp \ tests/test_table \ @@ -264,6 +265,7 @@ tests/test_handlers: LIBS = lib/libupb.descriptor.a lib/libupb.a tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a tests/test_table: LIBS = lib/libupb.a +tests/json/test_json: LIBS = lib/libupb.a lib/libupb.json.a tests/test_def: tests/test.proto.pb diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc new file mode 100644 index 0000000000..1444081706 --- /dev/null +++ b/tests/json/test_json.cc @@ -0,0 +1,244 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * + * A set of tests for JSON parsing and serialization. + */ + +#include "tests/upb_test.h" +#include "upb/handlers.h" +#include "upb/symtab.h" +#include "upb/json/printer.h" +#include "upb/json/parser.h" +#include "upb/upb.h" + +#include + +// Macros for readability in test case list: allows us to give TEST("...") / +// EXPECT("...") pairs. +#define TEST(x) x +#define EXPECT_SAME NULL +#define EXPECT(x) x +#define TEST_SENTINEL { NULL, NULL } + +struct TestCase { + const char* input; + const char* expected; +}; + +static TestCase kTestRoundtripMessages[] = { + // Test most fields here. + { + TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\"," + "\"optional_msg\":{\"foo\":42}," + "\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1}," + "{\"foo\":2}]}"), + EXPECT_SAME + }, + // Test special escapes in strings. + { + TEST("{\"repeated_string\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\"," + "\"\uFFFF\"]}"), + EXPECT_SAME + }, + // Test enum symbolic names. + { + // The common case: parse and print the symbolic name. + TEST("{\"optional_enum\":\"A\"}"), + EXPECT_SAME + }, + { + // Unknown enum value: will be printed as an integer. + TEST("{\"optional_enum\":42}"), + EXPECT_SAME + }, + { + // Known enum value: we're happy to parse an integer but we will re-emit the + // symbolic name. + TEST("{\"optional_enum\":1}"), + EXPECT("{\"optional_enum\":\"B\"}") + }, + // UTF-8 tests: escapes -> literal UTF8 in output. + { + // Note double escape on \uXXXX: we want the escape to be processed by the + // JSON parser, not by the C++ compiler! + TEST("{\"optional_string\":\"\\u007F\"}"), + EXPECT("{\"optional_string\":\"\x7F\"}") + }, + { + TEST("{\"optional_string\":\"\\u0080\"}"), + EXPECT("{\"optional_string\":\"\xC2\x80\"}") + }, + { + TEST("{\"optional_string\":\"\\u07FF\"}"), + EXPECT("{\"optional_string\":\"\xDF\xBF\"}") + }, + { + TEST("{\"optional_string\":\"\\u0800\"}"), + EXPECT("{\"optional_string\":\"\xE0\xA0\x80\"}") + }, + { + TEST("{\"optional_string\":\"\\uFFFF\"}"), + EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}") + }, + TEST_SENTINEL +}; + +static void AddField(upb::MessageDef* message, + int number, + const char* name, + upb_fieldtype_t type, + bool is_repeated, + const upb::Def* subdef = NULL) { + upb::reffed_ptr field(upb::FieldDef::New()); + upb::Status st; + field->set_name(name, &st); + field->set_type(type); + field->set_label(is_repeated ? UPB_LABEL_REPEATED : UPB_LABEL_OPTIONAL); + field->set_number(number, &st); + if (subdef) { + field->set_subdef(subdef, &st); + } + message->AddField(field, &st); +} + +static const upb::MessageDef* BuildTestMessage( + upb::reffed_ptr symtab) { + upb::Status st; + + // Create SubMessage. + upb::reffed_ptr submsg(upb::MessageDef::New()); + submsg->set_full_name("SubMessage", &st); + AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false); + + // Create MyEnum. + upb::reffed_ptr myenum(upb::EnumDef::New()); + myenum->set_full_name("MyEnum", &st); + myenum->AddValue("A", 0, &st); + myenum->AddValue("B", 1, &st); + myenum->AddValue("C", 2, &st); + + // Create TestMessage. + upb::reffed_ptr md(upb::MessageDef::New()); + md->set_full_name("TestMessage", &st); + + AddField(md.get(), 1, "optional_int32", UPB_TYPE_INT32, false); + AddField(md.get(), 2, "optional_int64", UPB_TYPE_INT64, false); + AddField(md.get(), 3, "optional_uint32", UPB_TYPE_UINT32, false); + AddField(md.get(), 4, "optional_uint64", UPB_TYPE_UINT64, false); + AddField(md.get(), 5, "optional_string", UPB_TYPE_STRING, false); + AddField(md.get(), 6, "optional_bytes", UPB_TYPE_BYTES, false); + AddField(md.get(), 7, "optional_bool" , UPB_TYPE_BOOL, false); + AddField(md.get(), 8, "optional_msg" , UPB_TYPE_MESSAGE, false, + upb::upcast(submsg.get())); + AddField(md.get(), 9, "optional_enum", UPB_TYPE_ENUM, false, + upb::upcast(myenum.get())); + + AddField(md.get(), 11, "repeated_int32", UPB_TYPE_INT32, true); + AddField(md.get(), 12, "repeated_int64", UPB_TYPE_INT64, true); + AddField(md.get(), 13, "repeated_uint32", UPB_TYPE_UINT32, true); + AddField(md.get(), 14, "repeated_uint64", UPB_TYPE_UINT64, true); + AddField(md.get(), 15, "repeated_string", UPB_TYPE_STRING, true); + AddField(md.get(), 16, "repeated_bytes", UPB_TYPE_BYTES, true); + AddField(md.get(), 17, "repeated_bool" , UPB_TYPE_BOOL, true); + AddField(md.get(), 18, "repeated_msg" , UPB_TYPE_MESSAGE, true, + upb::upcast(submsg.get())); + AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true, + upb::upcast(myenum.get())); + + // Add both to our symtab. + upb::Def* defs[3] = { + upb::upcast(submsg.ReleaseTo(&defs)), + upb::upcast(myenum.ReleaseTo(&defs)), + upb::upcast(md.ReleaseTo(&defs)), + }; + symtab->Add(defs, 3, &defs, &st); + + // Return TestMessage. + return symtab->LookupMessage("TestMessage"); +} + +class StringSink { + public: + StringSink() { + upb_byteshandler_init(&byteshandler_); + upb_byteshandler_setstring(&byteshandler_, &str_handler, NULL); + upb_bytessink_reset(&bytessink_, &byteshandler_, &s_); + } + ~StringSink() { } + + upb_bytessink* Sink() { return &bytessink_; } + + const std::string& Data() { return s_; } + + private: + + static size_t str_handler(void* _closure, const void* hd, + const char* data, size_t len, + const upb_bufhandle* handle) { + UPB_UNUSED(hd); + UPB_UNUSED(handle); + std::string* s = static_cast(_closure); + std::string appended(data, len); + s->append(data, len); + return len; + } + + upb_byteshandler byteshandler_; + upb_bytessink bytessink_; + std::string s_; +}; + +// Starts with a message in JSON format, parses and directly serializes again, +// and compares the result. +void test_json_roundtrip() { + upb::reffed_ptr symtab(upb::SymbolTable::New()); + const upb::MessageDef* md = BuildTestMessage(symtab.get()); + upb::reffed_ptr serialize_handlers( + upb::json::Printer::NewHandlers(md)); + + for (const TestCase* test_case = kTestRoundtripMessages; + test_case->input != NULL; test_case++) { + + const char *json_src = test_case->input; + const char *json_expected = test_case->expected; + if (json_expected == EXPECT_SAME) { + json_expected = json_src; + } + + upb::Status st; + upb::json::Parser parser(&st); + upb::json::Printer printer(serialize_handlers.get()); + StringSink data_sink; + + parser.ResetOutput(printer.input()); + printer.ResetOutput(data_sink.Sink()); + + bool ok = upb::BufferSource::PutBuffer(json_src, strlen(json_src), + parser.input()); + if (!ok) { + fprintf(stderr, "upb parse error: %s\n", st.error_message()); + } + ASSERT(ok); + + if (memcmp(json_expected, + data_sink.Data().data(), + data_sink.Data().size())) { + fprintf(stderr, + "JSON parse/serialize roundtrip result differs:\n" + "Original:\n%s\nParsed/Serialized:\n%s\n", + json_src, data_sink.Data().c_str()); + abort(); + } + } +} + +extern "C" { +int run_tests(int argc, char *argv[]) { + UPB_UNUSED(argc); + UPB_UNUSED(argv); + test_json_roundtrip(); + return 0; +} +} diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index 2bd78afa51..17fc0a865c 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -1032,7 +1032,7 @@ static int lupb_enumdef_value(lua_State *L) { } else if (type == LUA_TSTRING) { const char *key = lua_tostring(L, 2); int32_t num; - if (upb_enumdef_ntoi(e, key, &num)) { + if (upb_enumdef_ntoiz(e, key, &num)) { lua_pushinteger(L, num); } else { lua_pushnil(L); diff --git a/upb/def.c b/upb/def.c index fde2ee8522..aa05618dba 100644 --- a/upb/def.c +++ b/upb/def.c @@ -457,7 +457,7 @@ bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, if (!upb_isident(name, strlen(name), false, status)) { return false; } - if (upb_enumdef_ntoi(e, name, NULL)) { + if (upb_enumdef_ntoiz(e, name, NULL)) { upb_status_seterrf(status, "name '%s' is already defined", name); return false; } @@ -505,9 +505,10 @@ void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } -bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { +bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, + size_t len, int32_t *num) { upb_value v; - if (!upb_strtable_lookup(&def->ntoi, name, &v)) { + if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) { return false; } if (num) *num = upb_value_getint32(v); @@ -595,7 +596,7 @@ static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) { if (f->defaultval.bytes) { // Default was explicitly set as a str; try to lookup corresponding int. str_t *s = f->defaultval.bytes; - if (upb_enumdef_ntoi(e, s->str, val)) { + if (upb_enumdef_ntoiz(e, s->str, val)) { return true; } } else { diff --git a/upb/def.h b/upb/def.h index 2699fbf888..cfa140a16b 100644 --- a/upb/def.h +++ b/upb/def.h @@ -943,7 +943,17 @@ bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s); int upb_enumdef_numvals(const upb_enumdef *e); bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, upb_status *status); -bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num); + +// Enum lookups: +// - ntoi: look up a name with specified length. +// - ntoiz: look up a name provided as a null-terminated string. +// - iton: look up an integer, returning the name as a null-terminated string. +bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, size_t len, + int32_t *num); +UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e, + const char *name, int32_t *num) { + return upb_enumdef_ntoi(e, name, strlen(name), num); +} const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); // upb_enum_iter i; @@ -1352,7 +1362,7 @@ inline bool EnumDef::AddValue(const std::string& name, int32_t num, return upb_enumdef_addval(this, upb_safecstr(name), num, status); } inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const { - return upb_enumdef_ntoi(this, name, num); + return upb_enumdef_ntoiz(this, name, num); } inline const char* EnumDef::FindValueByNumber(int32_t num) const { return upb_enumdef_iton(this, num); diff --git a/upb/json/parser.c b/upb/json/parser.c index 2687713058..78fc6c0e21 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -288,7 +288,7 @@ badpadding: return false; } -static bool end_text(upb_json_parser *p, const char *ptr) { +static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { assert(!p->accumulated); // TODO: handle this case. p->accumulated = p->text_begin; p->accumulated_len = ptr - p->text_begin; @@ -302,6 +302,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) { upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); } p->accumulated = NULL; + } else if (p->top->f && + upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && + !is_num) { + + // Enum case: resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + int32_t int_val = 0; + if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, + &int_val)) { + upb_selector_t sel = getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrmsg(p->status, "Enum value name unknown"); + return false; + } + p->accumulated = NULL; } return true; @@ -310,29 +328,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) { static bool start_stringval(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_isstring(p->top->f)) { + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // Do nothing -- symbolic enum names in quotes remain in the + // current parser frame. + return true; + } else { upb_status_seterrf(p->status, - "String specified for non-string field: %s", + "String specified for non-string/non-enum field: %s", upb_fielddef_name(p->top->f)); return false; } - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow. - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; } static void end_stringval(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); + if (upb_fielddef_isstring(p->top->f)) { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + } } static void start_number(upb_json_parser *p, const char *ptr) { @@ -341,7 +368,7 @@ static void start_number(upb_json_parser *p, const char *ptr) { } static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr); + end_text(p, ptr, true); const char *myend = p->accumulated + p->accumulated_len; char *end; @@ -450,15 +477,15 @@ static void hex(upb_json_parser *p, const char *end) { // emit the codepoint as UTF-8. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. int length = 0; - if (codepoint < 0x7F) { + if (codepoint <= 0x7F) { utf8[0] = codepoint; length = 1; - } else if (codepoint < 0x07FF) { + } else if (codepoint <= 0x07FF) { utf8[1] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[0] = (codepoint & 0x1F) | 0xC0; length = 2; - } else /* codepoint < 0xFFFF */ { + } else /* codepoint <= 0xFFFF */ { utf8[2] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[1] = (codepoint & 0x3F) | 0x80; @@ -478,11 +505,11 @@ static void hex(upb_json_parser *p, const char *end) { // What follows is the Ragel parser itself. The language is specified in Ragel // and the actions call our C functions above. -#line 568 "upb/json/parser.rl" +#line 595 "upb/json/parser.rl" -#line 486 "upb/json/parser.c" +#line 513 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, @@ -635,7 +662,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 571 "upb/json/parser.rl" +#line 598 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -652,7 +679,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, const char *pe = buf + size; -#line 656 "upb/json/parser.c" +#line 683 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -727,114 +754,114 @@ _match: switch ( *_acts++ ) { case 0: -#line 489 "upb/json/parser.rl" +#line 516 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 490 "upb/json/parser.rl" +#line 517 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 494 "upb/json/parser.rl" +#line 521 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 495 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_text(parser, p)); } +#line 522 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_text(parser, p, false)); } break; case 4: -#line 501 "upb/json/parser.rl" +#line 528 "upb/json/parser.rl" { start_hex(parser, p); } break; case 5: -#line 502 "upb/json/parser.rl" +#line 529 "upb/json/parser.rl" { hex(parser, p); } break; case 6: -#line 508 "upb/json/parser.rl" +#line 535 "upb/json/parser.rl" { escape(parser, p); } break; case 7: -#line 511 "upb/json/parser.rl" +#line 538 "upb/json/parser.rl" { {cs = stack[--top]; goto _again;} } break; case 8: -#line 512 "upb/json/parser.rl" +#line 539 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 9: -#line 514 "upb/json/parser.rl" +#line 541 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 10: -#line 519 "upb/json/parser.rl" +#line 546 "upb/json/parser.rl" { start_member(parser); } break; case 11: -#line 520 "upb/json/parser.rl" +#line 547 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_member(parser)); } break; case 12: -#line 523 "upb/json/parser.rl" +#line 550 "upb/json/parser.rl" { clear_member(parser); } break; case 13: -#line 529 "upb/json/parser.rl" +#line 556 "upb/json/parser.rl" { start_object(parser); } break; case 14: -#line 532 "upb/json/parser.rl" +#line 559 "upb/json/parser.rl" { end_object(parser); } break; case 15: -#line 538 "upb/json/parser.rl" +#line 565 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 16: -#line 542 "upb/json/parser.rl" +#line 569 "upb/json/parser.rl" { end_array(parser); } break; case 17: -#line 547 "upb/json/parser.rl" +#line 574 "upb/json/parser.rl" { start_number(parser, p); } break; case 18: -#line 548 "upb/json/parser.rl" +#line 575 "upb/json/parser.rl" { end_number(parser, p); } break; case 19: -#line 550 "upb/json/parser.rl" +#line 577 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 20: -#line 551 "upb/json/parser.rl" +#line 578 "upb/json/parser.rl" { end_stringval(parser); } break; case 21: -#line 553 "upb/json/parser.rl" +#line 580 "upb/json/parser.rl" { CHECK_RETURN_TOP(putbool(parser, true)); } break; case 22: -#line 555 "upb/json/parser.rl" +#line 582 "upb/json/parser.rl" { CHECK_RETURN_TOP(putbool(parser, false)); } break; case 23: -#line 557 "upb/json/parser.rl" +#line 584 "upb/json/parser.rl" { /* null value */ } break; case 24: -#line 559 "upb/json/parser.rl" +#line 586 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 25: -#line 560 "upb/json/parser.rl" +#line 587 "upb/json/parser.rl" { end_subobject(parser); } break; case 26: -#line 565 "upb/json/parser.rl" +#line 592 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 838 "upb/json/parser.c" +#line 865 "upb/json/parser.c" } } @@ -847,7 +874,7 @@ _again: _out: {} } -#line 587 "upb/json/parser.rl" +#line 614 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -888,13 +915,13 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 892 "upb/json/parser.c" +#line 919 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 627 "upb/json/parser.rl" +#line 654 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; p->text_begin = NULL; diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 92a1566d9d..8ceca77d5f 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -286,7 +286,7 @@ badpadding: return false; } -static bool end_text(upb_json_parser *p, const char *ptr) { +static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { assert(!p->accumulated); // TODO: handle this case. p->accumulated = p->text_begin; p->accumulated_len = ptr - p->text_begin; @@ -300,6 +300,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) { upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); } p->accumulated = NULL; + } else if (p->top->f && + upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && + !is_num) { + + // Enum case: resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + int32_t int_val = 0; + if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, + &int_val)) { + upb_selector_t sel = getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrmsg(p->status, "Enum value name unknown"); + return false; + } + p->accumulated = NULL; } return true; @@ -308,29 +326,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) { static bool start_stringval(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_isstring(p->top->f)) { + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // Do nothing -- symbolic enum names in quotes remain in the + // current parser frame. + return true; + } else { upb_status_seterrf(p->status, - "String specified for non-string field: %s", + "String specified for non-string/non-enum field: %s", upb_fielddef_name(p->top->f)); return false; } - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow. - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; } static void end_stringval(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); + if (upb_fielddef_isstring(p->top->f)) { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + } } static void start_number(upb_json_parser *p, const char *ptr) { @@ -339,7 +366,7 @@ static void start_number(upb_json_parser *p, const char *ptr) { } static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr); + end_text(p, ptr, true); const char *myend = p->accumulated + p->accumulated_len; char *end; @@ -448,15 +475,15 @@ static void hex(upb_json_parser *p, const char *end) { // emit the codepoint as UTF-8. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. int length = 0; - if (codepoint < 0x7F) { + if (codepoint <= 0x7F) { utf8[0] = codepoint; length = 1; - } else if (codepoint < 0x07FF) { + } else if (codepoint <= 0x07FF) { utf8[1] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[0] = (codepoint & 0x1F) | 0xC0; length = 2; - } else /* codepoint < 0xFFFF */ { + } else /* codepoint <= 0xFFFF */ { utf8[2] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[1] = (codepoint & 0x3F) | 0x80; @@ -492,7 +519,7 @@ static void hex(upb_json_parser *p, const char *end) { text = /[^\\"]/+ >{ start_text(parser, p); } - %{ CHECK_RETURN_TOP(end_text(parser, p)); } + %{ CHECK_RETURN_TOP(end_text(parser, p, false)); } ; unicode_char = diff --git a/upb/json/printer.c b/upb/json/printer.c index 44e6f8316d..28f3e4a25a 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -69,10 +69,10 @@ static inline char* json_nice_escape(char c) { } } -// Write a properly quoted and escaped string. +// Write a properly escaped string chunk. The surrounding quotes are *not* +// printed; this is so that the caller has the option of emitting the string +// content in chunks. static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { - print_data(p, "\"", 1); - const char* unescaped_run = NULL; for (unsigned int i = 0; i < len; i++) { char c = buf[i]; @@ -112,8 +112,6 @@ static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { if (unescaped_run) { print_data(p, unescaped_run, &buf[len] - unescaped_run); } - - print_data(p, "\"", 1); } #define CHKLENGTH(x) if (!(x)) return -1; @@ -158,8 +156,9 @@ static bool putkey(void *closure, const void *handler_data) { upb_json_printer *p = closure; const strpc *key = handler_data; print_comma(p); + print_data(p, "\"", 1); putstring(p, key->ptr, key->len); - print_data(p, ":", 1); + print_data(p, "\":", 2); return true; } @@ -200,6 +199,47 @@ TYPE_HANDLERS(uint64_t, fmt_uint64); #undef TYPE_HANDLERS +typedef struct { + void *keyname; + const upb_enumdef *enumdef; +} EnumHandlerData; + +static bool scalar_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + CHK(putkey(closure, hd->keyname)); + + const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); + if (symbolic_name) { + print_data(p, "\"", 1); + putstring(p, symbolic_name, strlen(symbolic_name)); + print_data(p, "\"", 1); + } else { + putint32_t(closure, NULL, val); + } + + return true; +} + +static bool repeated_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + print_comma(p); + + const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); + if (symbolic_name) { + print_data(p, "\"", 1); + putstring(p, symbolic_name, strlen(symbolic_name)); + print_data(p, "\"", 1); + } else { + putint32_t(closure, NULL, val); + } + + return true; +} + static void *scalar_startsubmsg(void *closure, const void *handler_data) { return putkey(closure, handler_data) ? closure : UPB_BREAK; } @@ -310,27 +350,60 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str, } size_t bytes = to - data; + print_data(p, "\"", 1); putstring(p, data, bytes); + print_data(p, "\"", 1); return len; } +static void *scalar_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + CHK(putkey(closure, handler_data)); + print_data(p, "\"", 1); + return p; +} + static size_t scalar_str(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { - CHK(putkey(closure, handler_data)); CHK(putstr(closure, handler_data, str, len, handle)); return len; } +static bool scalar_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + +static void *repeated_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + print_comma(p); + print_data(p, "\"", 1); + return p; +} + static size_t repeated_str(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - print_comma(p); CHK(putstr(closure, handler_data, str, len, handle)); return len; } +static bool repeated_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + static size_t scalar_bytes(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { @@ -381,21 +454,44 @@ void sethandlers(const void *closure, upb_handlers *h) { TYPE(UPB_TYPE_FLOAT, float, float); TYPE(UPB_TYPE_DOUBLE, double, double); TYPE(UPB_TYPE_BOOL, bool, bool); - TYPE(UPB_TYPE_ENUM, int32, int32_t); TYPE(UPB_TYPE_INT32, int32, int32_t); TYPE(UPB_TYPE_UINT32, uint32, uint32_t); TYPE(UPB_TYPE_INT64, int64, int64_t); TYPE(UPB_TYPE_UINT64, uint64, uint64_t); + case UPB_TYPE_ENUM: { + // For now, we always emit symbolic names for enums. We may want an + // option later to control this behavior, but we will wait for a real + // need first. + EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); + hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); + hd->keyname = newstrpc(h, f); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&enum_attr, hd); + + if (upb_fielddef_isseq(f)) { + upb_handlers_setint32(h, f, repeated_enum, &enum_attr); + } else { + upb_handlers_setint32(h, f, scalar_enum, &enum_attr); + } + + upb_handlerattr_uninit(&enum_attr); + break; + } case UPB_TYPE_STRING: - // XXX: this doesn't support strings that span buffers yet. if (upb_fielddef_isseq(f)) { + upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); upb_handlers_setstring(h, f, repeated_str, &empty_attr); + upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); } else { - upb_handlers_setstring(h, f, scalar_str, &name_attr); + upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); + upb_handlers_setstring(h, f, scalar_str, &empty_attr); + upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); } break; case UPB_TYPE_BYTES: - // XXX: this doesn't support strings that span buffers yet. + // XXX: this doesn't support strings that span buffers yet. The base64 + // encoder will need to be made resumable for this to work properly. if (upb_fielddef_isseq(f)) { upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); } else {