Merge pull request #7 from cfallin/master

JSON test, symbolic enum names in JSON, and a few improvements.
pull/13171/head
Joshua Haberman 10 years ago
commit bf51ef86b4
  1. 2
      Makefile
  2. 244
      tests/json/test_json.cc
  3. 2
      upb/bindings/lua/upb.c
  4. 9
      upb/def.c
  5. 14
      upb/def.h
  6. 139
      upb/json/parser.c
  7. 69
      upb/json/parser.rl
  8. 122
      upb/json/printer.c

@ -235,6 +235,7 @@ C_TESTS = \
CC_TESTS = \ CC_TESTS = \
tests/pb/test_decoder \ tests/pb/test_decoder \
tests/json/test_json \
tests/test_cpp \ tests/test_cpp \
tests/test_table \ tests/test_table \
@ -264,6 +265,7 @@ tests/test_handlers: LIBS = lib/libupb.descriptor.a lib/libupb.a
tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a
tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a
tests/test_table: LIBS = lib/libupb.a tests/test_table: LIBS = lib/libupb.a
tests/json/test_json: LIBS = lib/libupb.a lib/libupb.json.a
tests/test_def: tests/test.proto.pb tests/test_def: tests/test.proto.pb

@ -0,0 +1,244 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2014 Google Inc. See LICENSE for details.
*
* A set of tests for JSON parsing and serialization.
*/
#include "tests/upb_test.h"
#include "upb/handlers.h"
#include "upb/symtab.h"
#include "upb/json/printer.h"
#include "upb/json/parser.h"
#include "upb/upb.h"
#include <string>
// Macros for readability in test case list: allows us to give TEST("...") /
// EXPECT("...") pairs.
#define TEST(x) x
#define EXPECT_SAME NULL
#define EXPECT(x) x
#define TEST_SENTINEL { NULL, NULL }
struct TestCase {
const char* input;
const char* expected;
};
static TestCase kTestRoundtripMessages[] = {
// Test most fields here.
{
TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\","
"\"optional_msg\":{\"foo\":42},"
"\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1},"
"{\"foo\":2}]}"),
EXPECT_SAME
},
// Test special escapes in strings.
{
TEST("{\"repeated_string\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\","
"\"\uFFFF\"]}"),
EXPECT_SAME
},
// Test enum symbolic names.
{
// The common case: parse and print the symbolic name.
TEST("{\"optional_enum\":\"A\"}"),
EXPECT_SAME
},
{
// Unknown enum value: will be printed as an integer.
TEST("{\"optional_enum\":42}"),
EXPECT_SAME
},
{
// Known enum value: we're happy to parse an integer but we will re-emit the
// symbolic name.
TEST("{\"optional_enum\":1}"),
EXPECT("{\"optional_enum\":\"B\"}")
},
// UTF-8 tests: escapes -> literal UTF8 in output.
{
// Note double escape on \uXXXX: we want the escape to be processed by the
// JSON parser, not by the C++ compiler!
TEST("{\"optional_string\":\"\\u007F\"}"),
EXPECT("{\"optional_string\":\"\x7F\"}")
},
{
TEST("{\"optional_string\":\"\\u0080\"}"),
EXPECT("{\"optional_string\":\"\xC2\x80\"}")
},
{
TEST("{\"optional_string\":\"\\u07FF\"}"),
EXPECT("{\"optional_string\":\"\xDF\xBF\"}")
},
{
TEST("{\"optional_string\":\"\\u0800\"}"),
EXPECT("{\"optional_string\":\"\xE0\xA0\x80\"}")
},
{
TEST("{\"optional_string\":\"\\uFFFF\"}"),
EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}")
},
TEST_SENTINEL
};
static void AddField(upb::MessageDef* message,
int number,
const char* name,
upb_fieldtype_t type,
bool is_repeated,
const upb::Def* subdef = NULL) {
upb::reffed_ptr<upb::FieldDef> field(upb::FieldDef::New());
upb::Status st;
field->set_name(name, &st);
field->set_type(type);
field->set_label(is_repeated ? UPB_LABEL_REPEATED : UPB_LABEL_OPTIONAL);
field->set_number(number, &st);
if (subdef) {
field->set_subdef(subdef, &st);
}
message->AddField(field, &st);
}
static const upb::MessageDef* BuildTestMessage(
upb::reffed_ptr<upb::SymbolTable> symtab) {
upb::Status st;
// Create SubMessage.
upb::reffed_ptr<upb::MessageDef> submsg(upb::MessageDef::New());
submsg->set_full_name("SubMessage", &st);
AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false);
// Create MyEnum.
upb::reffed_ptr<upb::EnumDef> myenum(upb::EnumDef::New());
myenum->set_full_name("MyEnum", &st);
myenum->AddValue("A", 0, &st);
myenum->AddValue("B", 1, &st);
myenum->AddValue("C", 2, &st);
// Create TestMessage.
upb::reffed_ptr<upb::MessageDef> md(upb::MessageDef::New());
md->set_full_name("TestMessage", &st);
AddField(md.get(), 1, "optional_int32", UPB_TYPE_INT32, false);
AddField(md.get(), 2, "optional_int64", UPB_TYPE_INT64, false);
AddField(md.get(), 3, "optional_uint32", UPB_TYPE_UINT32, false);
AddField(md.get(), 4, "optional_uint64", UPB_TYPE_UINT64, false);
AddField(md.get(), 5, "optional_string", UPB_TYPE_STRING, false);
AddField(md.get(), 6, "optional_bytes", UPB_TYPE_BYTES, false);
AddField(md.get(), 7, "optional_bool" , UPB_TYPE_BOOL, false);
AddField(md.get(), 8, "optional_msg" , UPB_TYPE_MESSAGE, false,
upb::upcast(submsg.get()));
AddField(md.get(), 9, "optional_enum", UPB_TYPE_ENUM, false,
upb::upcast(myenum.get()));
AddField(md.get(), 11, "repeated_int32", UPB_TYPE_INT32, true);
AddField(md.get(), 12, "repeated_int64", UPB_TYPE_INT64, true);
AddField(md.get(), 13, "repeated_uint32", UPB_TYPE_UINT32, true);
AddField(md.get(), 14, "repeated_uint64", UPB_TYPE_UINT64, true);
AddField(md.get(), 15, "repeated_string", UPB_TYPE_STRING, true);
AddField(md.get(), 16, "repeated_bytes", UPB_TYPE_BYTES, true);
AddField(md.get(), 17, "repeated_bool" , UPB_TYPE_BOOL, true);
AddField(md.get(), 18, "repeated_msg" , UPB_TYPE_MESSAGE, true,
upb::upcast(submsg.get()));
AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true,
upb::upcast(myenum.get()));
// Add both to our symtab.
upb::Def* defs[3] = {
upb::upcast(submsg.ReleaseTo(&defs)),
upb::upcast(myenum.ReleaseTo(&defs)),
upb::upcast(md.ReleaseTo(&defs)),
};
symtab->Add(defs, 3, &defs, &st);
// Return TestMessage.
return symtab->LookupMessage("TestMessage");
}
class StringSink {
public:
StringSink() {
upb_byteshandler_init(&byteshandler_);
upb_byteshandler_setstring(&byteshandler_, &str_handler, NULL);
upb_bytessink_reset(&bytessink_, &byteshandler_, &s_);
}
~StringSink() { }
upb_bytessink* Sink() { return &bytessink_; }
const std::string& Data() { return s_; }
private:
static size_t str_handler(void* _closure, const void* hd,
const char* data, size_t len,
const upb_bufhandle* handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
std::string* s = static_cast<std::string*>(_closure);
std::string appended(data, len);
s->append(data, len);
return len;
}
upb_byteshandler byteshandler_;
upb_bytessink bytessink_;
std::string s_;
};
// Starts with a message in JSON format, parses and directly serializes again,
// and compares the result.
void test_json_roundtrip() {
upb::reffed_ptr<upb::SymbolTable> symtab(upb::SymbolTable::New());
const upb::MessageDef* md = BuildTestMessage(symtab.get());
upb::reffed_ptr<const upb::Handlers> serialize_handlers(
upb::json::Printer::NewHandlers(md));
for (const TestCase* test_case = kTestRoundtripMessages;
test_case->input != NULL; test_case++) {
const char *json_src = test_case->input;
const char *json_expected = test_case->expected;
if (json_expected == EXPECT_SAME) {
json_expected = json_src;
}
upb::Status st;
upb::json::Parser parser(&st);
upb::json::Printer printer(serialize_handlers.get());
StringSink data_sink;
parser.ResetOutput(printer.input());
printer.ResetOutput(data_sink.Sink());
bool ok = upb::BufferSource::PutBuffer(json_src, strlen(json_src),
parser.input());
if (!ok) {
fprintf(stderr, "upb parse error: %s\n", st.error_message());
}
ASSERT(ok);
if (memcmp(json_expected,
data_sink.Data().data(),
data_sink.Data().size())) {
fprintf(stderr,
"JSON parse/serialize roundtrip result differs:\n"
"Original:\n%s\nParsed/Serialized:\n%s\n",
json_src, data_sink.Data().c_str());
abort();
}
}
}
extern "C" {
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);
UPB_UNUSED(argv);
test_json_roundtrip();
return 0;
}
}

@ -1032,7 +1032,7 @@ static int lupb_enumdef_value(lua_State *L) {
} else if (type == LUA_TSTRING) { } else if (type == LUA_TSTRING) {
const char *key = lua_tostring(L, 2); const char *key = lua_tostring(L, 2);
int32_t num; int32_t num;
if (upb_enumdef_ntoi(e, key, &num)) { if (upb_enumdef_ntoiz(e, key, &num)) {
lua_pushinteger(L, num); lua_pushinteger(L, num);
} else { } else {
lua_pushnil(L); lua_pushnil(L);

@ -457,7 +457,7 @@ bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
if (!upb_isident(name, strlen(name), false, status)) { if (!upb_isident(name, strlen(name), false, status)) {
return false; return false;
} }
if (upb_enumdef_ntoi(e, name, NULL)) { if (upb_enumdef_ntoiz(e, name, NULL)) {
upb_status_seterrf(status, "name '%s' is already defined", name); upb_status_seterrf(status, "name '%s' is already defined", name);
return false; return false;
} }
@ -505,9 +505,10 @@ void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
size_t len, int32_t *num) {
upb_value v; upb_value v;
if (!upb_strtable_lookup(&def->ntoi, name, &v)) { if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
return false; return false;
} }
if (num) *num = upb_value_getint32(v); if (num) *num = upb_value_getint32(v);
@ -595,7 +596,7 @@ static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
if (f->defaultval.bytes) { if (f->defaultval.bytes) {
// Default was explicitly set as a str; try to lookup corresponding int. // Default was explicitly set as a str; try to lookup corresponding int.
str_t *s = f->defaultval.bytes; str_t *s = f->defaultval.bytes;
if (upb_enumdef_ntoi(e, s->str, val)) { if (upb_enumdef_ntoiz(e, s->str, val)) {
return true; return true;
} }
} else { } else {

@ -943,7 +943,17 @@ bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s);
int upb_enumdef_numvals(const upb_enumdef *e); int upb_enumdef_numvals(const upb_enumdef *e);
bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
upb_status *status); upb_status *status);
bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num);
// Enum lookups:
// - ntoi: look up a name with specified length.
// - ntoiz: look up a name provided as a null-terminated string.
// - iton: look up an integer, returning the name as a null-terminated string.
bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, size_t len,
int32_t *num);
UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e,
const char *name, int32_t *num) {
return upb_enumdef_ntoi(e, name, strlen(name), num);
}
const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);
// upb_enum_iter i; // upb_enum_iter i;
@ -1352,7 +1362,7 @@ inline bool EnumDef::AddValue(const std::string& name, int32_t num,
return upb_enumdef_addval(this, upb_safecstr(name), num, status); return upb_enumdef_addval(this, upb_safecstr(name), num, status);
} }
inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const { inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const {
return upb_enumdef_ntoi(this, name, num); return upb_enumdef_ntoiz(this, name, num);
} }
inline const char* EnumDef::FindValueByNumber(int32_t num) const { inline const char* EnumDef::FindValueByNumber(int32_t num) const {
return upb_enumdef_iton(this, num); return upb_enumdef_iton(this, num);

@ -288,7 +288,7 @@ badpadding:
return false; return false;
} }
static bool end_text(upb_json_parser *p, const char *ptr) { static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case. assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin; p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin; p->accumulated_len = ptr - p->text_begin;
@ -302,6 +302,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
} }
p->accumulated = NULL; p->accumulated = NULL;
} else if (p->top->f &&
upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
!is_num) {
// Enum case: resolve enum symbolic name to integer value.
const upb_enumdef *enumdef =
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
int32_t int_val = 0;
if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
&int_val)) {
upb_selector_t sel = getsel(p);
upb_sink_putint32(&p->top->sink, sel, int_val);
} else {
upb_status_seterrmsg(p->status, "Enum value name unknown");
return false;
}
p->accumulated = NULL;
} }
return true; return true;
@ -310,29 +328,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
static bool start_stringval(upb_json_parser *p) { static bool start_stringval(upb_json_parser *p) {
assert(p->top->f); assert(p->top->f);
if (!upb_fielddef_isstring(p->top->f)) { if (upb_fielddef_isstring(p->top->f)) {
if (!check_stack(p)) return false;
// Start a new parser frame: parser frames correspond one-to-one with
// handler frames, and string events occur in a sub-frame.
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// Do nothing -- symbolic enum names in quotes remain in the
// current parser frame.
return true;
} else {
upb_status_seterrf(p->status, upb_status_seterrf(p->status,
"String specified for non-string field: %s", "String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f)); upb_fielddef_name(p->top->f));
return false; return false;
} }
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow.
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
} }
static void end_stringval(upb_json_parser *p) { static void end_stringval(upb_json_parser *p) {
p->top--; if (upb_fielddef_isstring(p->top->f)) {
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&p->top->sink, sel); upb_sink_endstr(&p->top->sink, sel);
p->top--;
}
} }
static void start_number(upb_json_parser *p, const char *ptr) { static void start_number(upb_json_parser *p, const char *ptr) {
@ -341,7 +368,7 @@ static void start_number(upb_json_parser *p, const char *ptr) {
} }
static void end_number(upb_json_parser *p, const char *ptr) { static void end_number(upb_json_parser *p, const char *ptr) {
end_text(p, ptr); end_text(p, ptr, true);
const char *myend = p->accumulated + p->accumulated_len; const char *myend = p->accumulated + p->accumulated_len;
char *end; char *end;
@ -450,15 +477,15 @@ static void hex(upb_json_parser *p, const char *end) {
// emit the codepoint as UTF-8. // emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0; int length = 0;
if (codepoint < 0x7F) { if (codepoint <= 0x7F) {
utf8[0] = codepoint; utf8[0] = codepoint;
length = 1; length = 1;
} else if (codepoint < 0x07FF) { } else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80; utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6; codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0; utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2; length = 2;
} else /* codepoint < 0xFFFF */ { } else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80; utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6; codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80; utf8[1] = (codepoint & 0x3F) | 0x80;
@ -478,11 +505,11 @@ static void hex(upb_json_parser *p, const char *end) {
// What follows is the Ragel parser itself. The language is specified in Ragel // What follows is the Ragel parser itself. The language is specified in Ragel
// and the actions call our C functions above. // and the actions call our C functions above.
#line 568 "upb/json/parser.rl" #line 595 "upb/json/parser.rl"
#line 486 "upb/json/parser.c" #line 513 "upb/json/parser.c"
static const char _json_actions[] = { static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1, 0, 1, 0, 1, 2, 1, 3, 1,
4, 1, 5, 1, 6, 1, 7, 1, 4, 1, 5, 1, 6, 1, 7, 1,
@ -635,7 +662,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1; static const int json_en_main = 1;
#line 571 "upb/json/parser.rl" #line 598 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size, size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) { const upb_bufhandle *handle) {
@ -652,7 +679,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const char *pe = buf + size; const char *pe = buf + size;
#line 656 "upb/json/parser.c" #line 683 "upb/json/parser.c"
{ {
int _klen; int _klen;
unsigned int _trans; unsigned int _trans;
@ -727,114 +754,114 @@ _match:
switch ( *_acts++ ) switch ( *_acts++ )
{ {
case 0: case 0:
#line 489 "upb/json/parser.rl" #line 516 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
case 1: case 1:
#line 490 "upb/json/parser.rl" #line 517 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} } { p--; {stack[top++] = cs; cs = 10; goto _again;} }
break; break;
case 2: case 2:
#line 494 "upb/json/parser.rl" #line 521 "upb/json/parser.rl"
{ start_text(parser, p); } { start_text(parser, p); }
break; break;
case 3: case 3:
#line 495 "upb/json/parser.rl" #line 522 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); } { CHECK_RETURN_TOP(end_text(parser, p, false)); }
break; break;
case 4: case 4:
#line 501 "upb/json/parser.rl" #line 528 "upb/json/parser.rl"
{ start_hex(parser, p); } { start_hex(parser, p); }
break; break;
case 5: case 5:
#line 502 "upb/json/parser.rl" #line 529 "upb/json/parser.rl"
{ hex(parser, p); } { hex(parser, p); }
break; break;
case 6: case 6:
#line 508 "upb/json/parser.rl" #line 535 "upb/json/parser.rl"
{ escape(parser, p); } { escape(parser, p); }
break; break;
case 7: case 7:
#line 511 "upb/json/parser.rl" #line 538 "upb/json/parser.rl"
{ {cs = stack[--top]; goto _again;} } { {cs = stack[--top]; goto _again;} }
break; break;
case 8: case 8:
#line 512 "upb/json/parser.rl" #line 539 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} } { {stack[top++] = cs; cs = 19; goto _again;} }
break; break;
case 9: case 9:
#line 514 "upb/json/parser.rl" #line 541 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} } { p--; {stack[top++] = cs; cs = 27; goto _again;} }
break; break;
case 10: case 10:
#line 519 "upb/json/parser.rl" #line 546 "upb/json/parser.rl"
{ start_member(parser); } { start_member(parser); }
break; break;
case 11: case 11:
#line 520 "upb/json/parser.rl" #line 547 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_member(parser)); } { CHECK_RETURN_TOP(end_member(parser)); }
break; break;
case 12: case 12:
#line 523 "upb/json/parser.rl" #line 550 "upb/json/parser.rl"
{ clear_member(parser); } { clear_member(parser); }
break; break;
case 13: case 13:
#line 529 "upb/json/parser.rl" #line 556 "upb/json/parser.rl"
{ start_object(parser); } { start_object(parser); }
break; break;
case 14: case 14:
#line 532 "upb/json/parser.rl" #line 559 "upb/json/parser.rl"
{ end_object(parser); } { end_object(parser); }
break; break;
case 15: case 15:
#line 538 "upb/json/parser.rl" #line 565 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); } { CHECK_RETURN_TOP(start_array(parser)); }
break; break;
case 16: case 16:
#line 542 "upb/json/parser.rl" #line 569 "upb/json/parser.rl"
{ end_array(parser); } { end_array(parser); }
break; break;
case 17: case 17:
#line 547 "upb/json/parser.rl" #line 574 "upb/json/parser.rl"
{ start_number(parser, p); } { start_number(parser, p); }
break; break;
case 18: case 18:
#line 548 "upb/json/parser.rl" #line 575 "upb/json/parser.rl"
{ end_number(parser, p); } { end_number(parser, p); }
break; break;
case 19: case 19:
#line 550 "upb/json/parser.rl" #line 577 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); } { CHECK_RETURN_TOP(start_stringval(parser)); }
break; break;
case 20: case 20:
#line 551 "upb/json/parser.rl" #line 578 "upb/json/parser.rl"
{ end_stringval(parser); } { end_stringval(parser); }
break; break;
case 21: case 21:
#line 553 "upb/json/parser.rl" #line 580 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(putbool(parser, true)); } { CHECK_RETURN_TOP(putbool(parser, true)); }
break; break;
case 22: case 22:
#line 555 "upb/json/parser.rl" #line 582 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(putbool(parser, false)); } { CHECK_RETURN_TOP(putbool(parser, false)); }
break; break;
case 23: case 23:
#line 557 "upb/json/parser.rl" #line 584 "upb/json/parser.rl"
{ /* null value */ } { /* null value */ }
break; break;
case 24: case 24:
#line 559 "upb/json/parser.rl" #line 586 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); } { CHECK_RETURN_TOP(start_subobject(parser)); }
break; break;
case 25: case 25:
#line 560 "upb/json/parser.rl" #line 587 "upb/json/parser.rl"
{ end_subobject(parser); } { end_subobject(parser); }
break; break;
case 26: case 26:
#line 565 "upb/json/parser.rl" #line 592 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
#line 838 "upb/json/parser.c" #line 865 "upb/json/parser.c"
} }
} }
@ -847,7 +874,7 @@ _again:
_out: {} _out: {}
} }
#line 587 "upb/json/parser.rl" #line 614 "upb/json/parser.rl"
if (p != pe) { if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p); upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@ -888,13 +915,13 @@ void upb_json_parser_reset(upb_json_parser *p) {
int top; int top;
// Emit Ragel initialization of the parser. // Emit Ragel initialization of the parser.
#line 892 "upb/json/parser.c" #line 919 "upb/json/parser.c"
{ {
cs = json_start; cs = json_start;
top = 0; top = 0;
} }
#line 627 "upb/json/parser.rl" #line 654 "upb/json/parser.rl"
p->current_state = cs; p->current_state = cs;
p->parser_top = top; p->parser_top = top;
p->text_begin = NULL; p->text_begin = NULL;

@ -286,7 +286,7 @@ badpadding:
return false; return false;
} }
static bool end_text(upb_json_parser *p, const char *ptr) { static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case. assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin; p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin; p->accumulated_len = ptr - p->text_begin;
@ -300,6 +300,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
} }
p->accumulated = NULL; p->accumulated = NULL;
} else if (p->top->f &&
upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
!is_num) {
// Enum case: resolve enum symbolic name to integer value.
const upb_enumdef *enumdef =
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
int32_t int_val = 0;
if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
&int_val)) {
upb_selector_t sel = getsel(p);
upb_sink_putint32(&p->top->sink, sel, int_val);
} else {
upb_status_seterrmsg(p->status, "Enum value name unknown");
return false;
}
p->accumulated = NULL;
} }
return true; return true;
@ -308,29 +326,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) {
static bool start_stringval(upb_json_parser *p) { static bool start_stringval(upb_json_parser *p) {
assert(p->top->f); assert(p->top->f);
if (!upb_fielddef_isstring(p->top->f)) { if (upb_fielddef_isstring(p->top->f)) {
if (!check_stack(p)) return false;
// Start a new parser frame: parser frames correspond one-to-one with
// handler frames, and string events occur in a sub-frame.
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// Do nothing -- symbolic enum names in quotes remain in the
// current parser frame.
return true;
} else {
upb_status_seterrf(p->status, upb_status_seterrf(p->status,
"String specified for non-string field: %s", "String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f)); upb_fielddef_name(p->top->f));
return false; return false;
} }
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow.
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
} }
static void end_stringval(upb_json_parser *p) { static void end_stringval(upb_json_parser *p) {
p->top--; if (upb_fielddef_isstring(p->top->f)) {
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&p->top->sink, sel); upb_sink_endstr(&p->top->sink, sel);
p->top--;
}
} }
static void start_number(upb_json_parser *p, const char *ptr) { static void start_number(upb_json_parser *p, const char *ptr) {
@ -339,7 +366,7 @@ static void start_number(upb_json_parser *p, const char *ptr) {
} }
static void end_number(upb_json_parser *p, const char *ptr) { static void end_number(upb_json_parser *p, const char *ptr) {
end_text(p, ptr); end_text(p, ptr, true);
const char *myend = p->accumulated + p->accumulated_len; const char *myend = p->accumulated + p->accumulated_len;
char *end; char *end;
@ -448,15 +475,15 @@ static void hex(upb_json_parser *p, const char *end) {
// emit the codepoint as UTF-8. // emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0; int length = 0;
if (codepoint < 0x7F) { if (codepoint <= 0x7F) {
utf8[0] = codepoint; utf8[0] = codepoint;
length = 1; length = 1;
} else if (codepoint < 0x07FF) { } else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80; utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6; codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0; utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2; length = 2;
} else /* codepoint < 0xFFFF */ { } else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80; utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6; codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80; utf8[1] = (codepoint & 0x3F) | 0x80;
@ -492,7 +519,7 @@ static void hex(upb_json_parser *p, const char *end) {
text = text =
/[^\\"]/+ /[^\\"]/+
>{ start_text(parser, p); } >{ start_text(parser, p); }
%{ CHECK_RETURN_TOP(end_text(parser, p)); } %{ CHECK_RETURN_TOP(end_text(parser, p, false)); }
; ;
unicode_char = unicode_char =

@ -69,10 +69,10 @@ static inline char* json_nice_escape(char c) {
} }
} }
// Write a properly quoted and escaped string. // Write a properly escaped string chunk. The surrounding quotes are *not*
// printed; this is so that the caller has the option of emitting the string
// content in chunks.
static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
print_data(p, "\"", 1);
const char* unescaped_run = NULL; const char* unescaped_run = NULL;
for (unsigned int i = 0; i < len; i++) { for (unsigned int i = 0; i < len; i++) {
char c = buf[i]; char c = buf[i];
@ -112,8 +112,6 @@ static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
if (unescaped_run) { if (unescaped_run) {
print_data(p, unescaped_run, &buf[len] - unescaped_run); print_data(p, unescaped_run, &buf[len] - unescaped_run);
} }
print_data(p, "\"", 1);
} }
#define CHKLENGTH(x) if (!(x)) return -1; #define CHKLENGTH(x) if (!(x)) return -1;
@ -158,8 +156,9 @@ static bool putkey(void *closure, const void *handler_data) {
upb_json_printer *p = closure; upb_json_printer *p = closure;
const strpc *key = handler_data; const strpc *key = handler_data;
print_comma(p); print_comma(p);
print_data(p, "\"", 1);
putstring(p, key->ptr, key->len); putstring(p, key->ptr, key->len);
print_data(p, ":", 1); print_data(p, "\":", 2);
return true; return true;
} }
@ -200,6 +199,47 @@ TYPE_HANDLERS(uint64_t, fmt_uint64);
#undef TYPE_HANDLERS #undef TYPE_HANDLERS
typedef struct {
void *keyname;
const upb_enumdef *enumdef;
} EnumHandlerData;
static bool scalar_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
CHK(putkey(closure, hd->keyname));
const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
if (symbolic_name) {
print_data(p, "\"", 1);
putstring(p, symbolic_name, strlen(symbolic_name));
print_data(p, "\"", 1);
} else {
putint32_t(closure, NULL, val);
}
return true;
}
static bool repeated_enum(void *closure, const void *handler_data,
int32_t val) {
const EnumHandlerData *hd = handler_data;
upb_json_printer *p = closure;
print_comma(p);
const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
if (symbolic_name) {
print_data(p, "\"", 1);
putstring(p, symbolic_name, strlen(symbolic_name));
print_data(p, "\"", 1);
} else {
putint32_t(closure, NULL, val);
}
return true;
}
static void *scalar_startsubmsg(void *closure, const void *handler_data) { static void *scalar_startsubmsg(void *closure, const void *handler_data) {
return putkey(closure, handler_data) ? closure : UPB_BREAK; return putkey(closure, handler_data) ? closure : UPB_BREAK;
} }
@ -310,27 +350,60 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str,
} }
size_t bytes = to - data; size_t bytes = to - data;
print_data(p, "\"", 1);
putstring(p, data, bytes); putstring(p, data, bytes);
print_data(p, "\"", 1);
return len; return len;
} }
static void *scalar_startstr(void *closure, const void *handler_data,
size_t size_hint) {
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
CHK(putkey(closure, handler_data));
print_data(p, "\"", 1);
return p;
}
static size_t scalar_str(void *closure, const void *handler_data, static size_t scalar_str(void *closure, const void *handler_data,
const char *str, size_t len, const char *str, size_t len,
const upb_bufhandle *handle) { const upb_bufhandle *handle) {
CHK(putkey(closure, handler_data));
CHK(putstr(closure, handler_data, str, len, handle)); CHK(putstr(closure, handler_data, str, len, handle));
return len; return len;
} }
static bool scalar_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return true;
}
static void *repeated_startstr(void *closure, const void *handler_data,
size_t size_hint) {
UPB_UNUSED(handler_data);
UPB_UNUSED(size_hint);
upb_json_printer *p = closure;
print_comma(p);
print_data(p, "\"", 1);
return p;
}
static size_t repeated_str(void *closure, const void *handler_data, static size_t repeated_str(void *closure, const void *handler_data,
const char *str, size_t len, const char *str, size_t len,
const upb_bufhandle *handle) { const upb_bufhandle *handle) {
upb_json_printer *p = closure;
print_comma(p);
CHK(putstr(closure, handler_data, str, len, handle)); CHK(putstr(closure, handler_data, str, len, handle));
return len; return len;
} }
static bool repeated_endstr(void *closure, const void *handler_data) {
UPB_UNUSED(handler_data);
upb_json_printer *p = closure;
print_data(p, "\"", 1);
return true;
}
static size_t scalar_bytes(void *closure, const void *handler_data, static size_t scalar_bytes(void *closure, const void *handler_data,
const char *str, size_t len, const char *str, size_t len,
const upb_bufhandle *handle) { const upb_bufhandle *handle) {
@ -381,21 +454,44 @@ void sethandlers(const void *closure, upb_handlers *h) {
TYPE(UPB_TYPE_FLOAT, float, float); TYPE(UPB_TYPE_FLOAT, float, float);
TYPE(UPB_TYPE_DOUBLE, double, double); TYPE(UPB_TYPE_DOUBLE, double, double);
TYPE(UPB_TYPE_BOOL, bool, bool); TYPE(UPB_TYPE_BOOL, bool, bool);
TYPE(UPB_TYPE_ENUM, int32, int32_t);
TYPE(UPB_TYPE_INT32, int32, int32_t); TYPE(UPB_TYPE_INT32, int32, int32_t);
TYPE(UPB_TYPE_UINT32, uint32, uint32_t); TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
TYPE(UPB_TYPE_INT64, int64, int64_t); TYPE(UPB_TYPE_INT64, int64, int64_t);
TYPE(UPB_TYPE_UINT64, uint64, uint64_t); TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
case UPB_TYPE_ENUM: {
// For now, we always emit symbolic names for enums. We may want an
// option later to control this behavior, but we will wait for a real
// need first.
EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
hd->keyname = newstrpc(h, f);
upb_handlers_addcleanup(h, hd, free);
upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
upb_handlerattr_sethandlerdata(&enum_attr, hd);
if (upb_fielddef_isseq(f)) {
upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
} else {
upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
}
upb_handlerattr_uninit(&enum_attr);
break;
}
case UPB_TYPE_STRING: case UPB_TYPE_STRING:
// XXX: this doesn't support strings that span buffers yet.
if (upb_fielddef_isseq(f)) { if (upb_fielddef_isseq(f)) {
upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
upb_handlers_setstring(h, f, repeated_str, &empty_attr); upb_handlers_setstring(h, f, repeated_str, &empty_attr);
upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
} else { } else {
upb_handlers_setstring(h, f, scalar_str, &name_attr); upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
upb_handlers_setstring(h, f, scalar_str, &empty_attr);
upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
} }
break; break;
case UPB_TYPE_BYTES: case UPB_TYPE_BYTES:
// XXX: this doesn't support strings that span buffers yet. // XXX: this doesn't support strings that span buffers yet. The base64
// encoder will need to be made resumable for this to work properly.
if (upb_fielddef_isseq(f)) { if (upb_fielddef_isseq(f)) {
upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
} else { } else {

Loading…
Cancel
Save