Merge pull request #112 from TeBoring/json-unknown

Enable ignore unknown field in json parsing
pull/13171/head
Joshua Haberman 7 years ago committed by GitHub
commit 1db11440bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      tests/json/test_json.cc
  2. 128
      upb/json/parser.c
  3. 9
      upb/json/parser.h
  4. 56
      upb/json/parser.rl

@ -176,7 +176,8 @@ void test_json_roundtrip_message(const char* json_src,
upb::json::Printer* printer = upb::json::Printer::Create( upb::json::Printer* printer = upb::json::Printer::Create(
env.env(), serialize_handlers, data_sink.Sink()); env.env(), serialize_handlers, data_sink.Sink());
upb::json::Parser* parser = upb::json::Parser* parser =
upb::json::Parser::Create(env.env(), parser_method, printer->input()); upb::json::Parser::Create(
env.env(), parser_method, printer->input(), false);
env.ResetBytesSink(parser->input()); env.ResetBytesSink(parser->input());
env.Reset(json_src, strlen(json_src), false, false); env.Reset(json_src, strlen(json_src), false, false);

@ -96,6 +96,9 @@ struct upb_json_parser {
/* Intermediate result of parsing a unicode escape sequence. */ /* Intermediate result of parsing a unicode escape sequence. */
uint32_t digit; uint32_t digit;
/* Whether to proceed if unknown field is met. */
bool ignore_json_unknown;
}; };
struct upb_json_parsermethod { struct upb_json_parsermethod {
@ -616,6 +619,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
return false; return false;
} }
if (p->top->f == NULL) {
multipart_end(p);
return true;
}
return parse_number(p, false); return parse_number(p, false);
} }
@ -768,6 +776,10 @@ static bool parse_number(upb_json_parser *p, bool is_quoted) {
static bool parser_putbool(upb_json_parser *p, bool val) { static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok; bool ok;
if (p->top->f == NULL) {
return true;
}
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(&p->status, upb_status_seterrf(&p->status,
"Boolean value specified for non-bool field: %s", "Boolean value specified for non-bool field: %s",
@ -783,7 +795,10 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
} }
static bool start_stringval(upb_json_parser *p) { static bool start_stringval(upb_json_parser *p) {
UPB_ASSERT(p->top->f); if (p->top->f == NULL) {
multipart_startaccum(p);
return true;
}
if (upb_fielddef_isstring(p->top->f)) { if (upb_fielddef_isstring(p->top->f)) {
upb_jsonparser_frame *inner; upb_jsonparser_frame *inner;
@ -834,6 +849,11 @@ static bool start_stringval(upb_json_parser *p) {
static bool end_stringval(upb_json_parser *p) { static bool end_stringval(upb_json_parser *p) {
bool ok = true; bool ok = true;
if (p->top->f == NULL) {
multipart_end(p);
return true;
}
switch (upb_fielddef_type(p->top->f)) { switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_BYTES: case UPB_TYPE_BYTES:
if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
@ -1025,6 +1045,10 @@ static bool handle_mapentry(upb_json_parser *p) {
static bool end_membername(upb_json_parser *p) { static bool end_membername(upb_json_parser *p) {
UPB_ASSERT(!p->top->f); UPB_ASSERT(!p->top->f);
if (!p->top->m) {
return true;
}
if (p->top->is_map) { if (p->top->is_map) {
return handle_mapentry(p); return handle_mapentry(p);
} else { } else {
@ -1036,10 +1060,11 @@ static bool end_membername(upb_json_parser *p) {
p->top->f = upb_value_getconstptr(v); p->top->f = upb_value_getconstptr(v);
multipart_end(p); multipart_end(p);
return true;
} else if (p->ignore_json_unknown) {
multipart_end(p);
return true; return true;
} else { } else {
/* TODO(haberman): Ignore unknown fields if requested/configured to do
* so. */
upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
upb_env_reporterror(p->env, &p->status); upb_env_reporterror(p->env, &p->status);
return false; return false;
@ -1071,7 +1096,18 @@ static void end_member(upb_json_parser *p) {
} }
static bool start_subobject(upb_json_parser *p) { static bool start_subobject(upb_json_parser *p) {
UPB_ASSERT(p->top->f); if (p->top->f == NULL) {
upb_jsonparser_frame *inner;
if (!check_stack(p)) return false;
inner = p->top + 1;
inner->m = NULL;
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
}
if (upb_fielddef_ismap(p->top->f)) { if (upb_fielddef_ismap(p->top->f)) {
upb_jsonparser_frame *inner; upb_jsonparser_frame *inner;
@ -1130,9 +1166,12 @@ static void end_subobject(upb_json_parser *p) {
upb_sink_endseq(&p->top->sink, sel); upb_sink_endseq(&p->top->sink, sel);
} else { } else {
upb_selector_t sel; upb_selector_t sel;
bool is_unknown = p->top->m == NULL;
p->top--; p->top--;
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); if (!is_unknown) {
upb_sink_endsubmsg(&p->top->sink, sel); sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
} }
} }
@ -1214,11 +1253,11 @@ static void end_object(upb_json_parser *p) {
* final state once, when the closing '"' is seen. */ * final state once, when the closing '"' is seen. */
#line 1310 "upb/json/parser.rl" #line 1349 "upb/json/parser.rl"
#line 1222 "upb/json/parser.c" #line 1261 "upb/json/parser.c"
static const char _json_actions[] = { static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1, 0, 1, 0, 1, 2, 1, 3, 1,
5, 1, 6, 1, 7, 1, 8, 1, 5, 1, 6, 1, 7, 1, 8, 1,
@ -1367,7 +1406,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1; static const int json_en_main = 1;
#line 1313 "upb/json/parser.rl" #line 1352 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size, size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) { const upb_bufhandle *handle) {
@ -1389,7 +1428,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf); capture_resume(parser, buf);
#line 1393 "upb/json/parser.c" #line 1432 "upb/json/parser.c"
{ {
int _klen; int _klen;
unsigned int _trans; unsigned int _trans;
@ -1464,118 +1503,118 @@ _match:
switch ( *_acts++ ) switch ( *_acts++ )
{ {
case 0: case 0:
#line 1225 "upb/json/parser.rl" #line 1264 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
case 1: case 1:
#line 1226 "upb/json/parser.rl" #line 1265 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} } { p--; {stack[top++] = cs; cs = 10; goto _again;} }
break; break;
case 2: case 2:
#line 1230 "upb/json/parser.rl" #line 1269 "upb/json/parser.rl"
{ start_text(parser, p); } { start_text(parser, p); }
break; break;
case 3: case 3:
#line 1231 "upb/json/parser.rl" #line 1270 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); } { CHECK_RETURN_TOP(end_text(parser, p)); }
break; break;
case 4: case 4:
#line 1237 "upb/json/parser.rl" #line 1276 "upb/json/parser.rl"
{ start_hex(parser); } { start_hex(parser); }
break; break;
case 5: case 5:
#line 1238 "upb/json/parser.rl" #line 1277 "upb/json/parser.rl"
{ hexdigit(parser, p); } { hexdigit(parser, p); }
break; break;
case 6: case 6:
#line 1239 "upb/json/parser.rl" #line 1278 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); } { CHECK_RETURN_TOP(end_hex(parser)); }
break; break;
case 7: case 7:
#line 1245 "upb/json/parser.rl" #line 1284 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); } { CHECK_RETURN_TOP(escape(parser, p)); }
break; break;
case 8: case 8:
#line 1251 "upb/json/parser.rl" #line 1290 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
case 9: case 9:
#line 1254 "upb/json/parser.rl" #line 1293 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} } { {stack[top++] = cs; cs = 19; goto _again;} }
break; break;
case 10: case 10:
#line 1256 "upb/json/parser.rl" #line 1295 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} } { p--; {stack[top++] = cs; cs = 27; goto _again;} }
break; break;
case 11: case 11:
#line 1261 "upb/json/parser.rl" #line 1300 "upb/json/parser.rl"
{ start_member(parser); } { start_member(parser); }
break; break;
case 12: case 12:
#line 1262 "upb/json/parser.rl" #line 1301 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_membername(parser)); } { CHECK_RETURN_TOP(end_membername(parser)); }
break; break;
case 13: case 13:
#line 1265 "upb/json/parser.rl" #line 1304 "upb/json/parser.rl"
{ end_member(parser); } { end_member(parser); }
break; break;
case 14: case 14:
#line 1271 "upb/json/parser.rl" #line 1310 "upb/json/parser.rl"
{ start_object(parser); } { start_object(parser); }
break; break;
case 15: case 15:
#line 1274 "upb/json/parser.rl" #line 1313 "upb/json/parser.rl"
{ end_object(parser); } { end_object(parser); }
break; break;
case 16: case 16:
#line 1280 "upb/json/parser.rl" #line 1319 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); } { CHECK_RETURN_TOP(start_array(parser)); }
break; break;
case 17: case 17:
#line 1284 "upb/json/parser.rl" #line 1323 "upb/json/parser.rl"
{ end_array(parser); } { end_array(parser); }
break; break;
case 18: case 18:
#line 1289 "upb/json/parser.rl" #line 1328 "upb/json/parser.rl"
{ start_number(parser, p); } { start_number(parser, p); }
break; break;
case 19: case 19:
#line 1290 "upb/json/parser.rl" #line 1329 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); } { CHECK_RETURN_TOP(end_number(parser, p)); }
break; break;
case 20: case 20:
#line 1292 "upb/json/parser.rl" #line 1331 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); } { CHECK_RETURN_TOP(start_stringval(parser)); }
break; break;
case 21: case 21:
#line 1293 "upb/json/parser.rl" #line 1332 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); } { CHECK_RETURN_TOP(end_stringval(parser)); }
break; break;
case 22: case 22:
#line 1295 "upb/json/parser.rl" #line 1334 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); } { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break; break;
case 23: case 23:
#line 1297 "upb/json/parser.rl" #line 1336 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); } { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break; break;
case 24: case 24:
#line 1299 "upb/json/parser.rl" #line 1338 "upb/json/parser.rl"
{ /* null value */ } { /* null value */ }
break; break;
case 25: case 25:
#line 1301 "upb/json/parser.rl" #line 1340 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); } { CHECK_RETURN_TOP(start_subobject(parser)); }
break; break;
case 26: case 26:
#line 1302 "upb/json/parser.rl" #line 1341 "upb/json/parser.rl"
{ end_subobject(parser); } { end_subobject(parser); }
break; break;
case 27: case 27:
#line 1307 "upb/json/parser.rl" #line 1346 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
#line 1579 "upb/json/parser.c" #line 1618 "upb/json/parser.c"
} }
} }
@ -1588,7 +1627,7 @@ _again:
_out: {} _out: {}
} }
#line 1334 "upb/json/parser.rl" #line 1373 "upb/json/parser.rl"
if (p != pe) { if (p != pe) {
upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p); upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
@ -1629,13 +1668,13 @@ static void json_parser_reset(upb_json_parser *p) {
/* Emit Ragel initialization of the parser. */ /* Emit Ragel initialization of the parser. */
#line 1633 "upb/json/parser.c" #line 1672 "upb/json/parser.c"
{ {
cs = json_start; cs = json_start;
top = 0; top = 0;
} }
#line 1374 "upb/json/parser.rl" #line 1413 "upb/json/parser.rl"
p->current_state = cs; p->current_state = cs;
p->parser_top = top; p->parser_top = top;
accumulate_clear(p); accumulate_clear(p);
@ -1722,7 +1761,8 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
upb_json_parser *upb_json_parser_create(upb_env *env, upb_json_parser *upb_json_parser_create(upb_env *env,
const upb_json_parsermethod *method, const upb_json_parsermethod *method,
upb_sink *output) { upb_sink *output,
bool ignore_json_unknown) {
#ifndef NDEBUG #ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env); const size_t size_before = upb_env_bytesallocated(env);
#endif #endif
@ -1741,6 +1781,8 @@ upb_json_parser *upb_json_parser_create(upb_env *env,
p->top->m = upb_handlers_msgdef(output->handlers); p->top->m = upb_handlers_msgdef(output->handlers);
set_name_table(p, p->top); set_name_table(p, p->top);
p->ignore_json_unknown = ignore_json_unknown;
/* If this fails, uncomment and increase the value in parser.h. */ /* If this fails, uncomment and increase the value in parser.h. */
/* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <= UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=

@ -38,7 +38,7 @@ UPB_DECLARE_DERIVED_TYPE(upb::json::ParserMethod, upb::RefCounted,
class upb::json::Parser { class upb::json::Parser {
public: public:
static Parser* Create(Environment* env, const ParserMethod* method, static Parser* Create(Environment* env, const ParserMethod* method,
Sink* output); Sink* output, bool ignore_json_unknown);
BytesSink* input(); BytesSink* input();
@ -72,7 +72,8 @@ UPB_BEGIN_EXTERN_C
upb_json_parser* upb_json_parser_create(upb_env* e, upb_json_parser* upb_json_parser_create(upb_env* e,
const upb_json_parsermethod* m, const upb_json_parsermethod* m,
upb_sink* output); upb_sink* output,
bool ignore_json_unknown);
upb_bytessink *upb_json_parser_input(upb_json_parser *p); upb_bytessink *upb_json_parser_input(upb_json_parser *p);
upb_json_parsermethod* upb_json_parsermethod_new(const upb_msgdef* md, upb_json_parsermethod* upb_json_parsermethod_new(const upb_msgdef* md,
@ -92,8 +93,8 @@ UPB_END_EXTERN_C
namespace upb { namespace upb {
namespace json { namespace json {
inline Parser* Parser::Create(Environment* env, const ParserMethod* method, inline Parser* Parser::Create(Environment* env, const ParserMethod* method,
Sink* output) { Sink* output, bool ignore_json_unknown) {
return upb_json_parser_create(env, method, output); return upb_json_parser_create(env, method, output, ignore_json_unknown);
} }
inline BytesSink* Parser::input() { inline BytesSink* Parser::input() {
return upb_json_parser_input(this); return upb_json_parser_input(this);

@ -94,6 +94,9 @@ struct upb_json_parser {
/* Intermediate result of parsing a unicode escape sequence. */ /* Intermediate result of parsing a unicode escape sequence. */
uint32_t digit; uint32_t digit;
/* Whether to proceed if unknown field is met. */
bool ignore_json_unknown;
}; };
struct upb_json_parsermethod { struct upb_json_parsermethod {
@ -614,6 +617,11 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
return false; return false;
} }
if (p->top->f == NULL) {
multipart_end(p);
return true;
}
return parse_number(p, false); return parse_number(p, false);
} }
@ -766,6 +774,10 @@ static bool parse_number(upb_json_parser *p, bool is_quoted) {
static bool parser_putbool(upb_json_parser *p, bool val) { static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok; bool ok;
if (p->top->f == NULL) {
return true;
}
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(&p->status, upb_status_seterrf(&p->status,
"Boolean value specified for non-bool field: %s", "Boolean value specified for non-bool field: %s",
@ -781,7 +793,10 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
} }
static bool start_stringval(upb_json_parser *p) { static bool start_stringval(upb_json_parser *p) {
UPB_ASSERT(p->top->f); if (p->top->f == NULL) {
multipart_startaccum(p);
return true;
}
if (upb_fielddef_isstring(p->top->f)) { if (upb_fielddef_isstring(p->top->f)) {
upb_jsonparser_frame *inner; upb_jsonparser_frame *inner;
@ -832,6 +847,11 @@ static bool start_stringval(upb_json_parser *p) {
static bool end_stringval(upb_json_parser *p) { static bool end_stringval(upb_json_parser *p) {
bool ok = true; bool ok = true;
if (p->top->f == NULL) {
multipart_end(p);
return true;
}
switch (upb_fielddef_type(p->top->f)) { switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_BYTES: case UPB_TYPE_BYTES:
if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
@ -1023,6 +1043,10 @@ static bool handle_mapentry(upb_json_parser *p) {
static bool end_membername(upb_json_parser *p) { static bool end_membername(upb_json_parser *p) {
UPB_ASSERT(!p->top->f); UPB_ASSERT(!p->top->f);
if (!p->top->m) {
return true;
}
if (p->top->is_map) { if (p->top->is_map) {
return handle_mapentry(p); return handle_mapentry(p);
} else { } else {
@ -1034,10 +1058,11 @@ static bool end_membername(upb_json_parser *p) {
p->top->f = upb_value_getconstptr(v); p->top->f = upb_value_getconstptr(v);
multipart_end(p); multipart_end(p);
return true;
} else if (p->ignore_json_unknown) {
multipart_end(p);
return true; return true;
} else { } else {
/* TODO(haberman): Ignore unknown fields if requested/configured to do
* so. */
upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
upb_env_reporterror(p->env, &p->status); upb_env_reporterror(p->env, &p->status);
return false; return false;
@ -1069,7 +1094,18 @@ static void end_member(upb_json_parser *p) {
} }
static bool start_subobject(upb_json_parser *p) { static bool start_subobject(upb_json_parser *p) {
UPB_ASSERT(p->top->f); if (p->top->f == NULL) {
upb_jsonparser_frame *inner;
if (!check_stack(p)) return false;
inner = p->top + 1;
inner->m = NULL;
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
}
if (upb_fielddef_ismap(p->top->f)) { if (upb_fielddef_ismap(p->top->f)) {
upb_jsonparser_frame *inner; upb_jsonparser_frame *inner;
@ -1128,9 +1164,12 @@ static void end_subobject(upb_json_parser *p) {
upb_sink_endseq(&p->top->sink, sel); upb_sink_endseq(&p->top->sink, sel);
} else { } else {
upb_selector_t sel; upb_selector_t sel;
bool is_unknown = p->top->m == NULL;
p->top--; p->top--;
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); if (!is_unknown) {
upb_sink_endsubmsg(&p->top->sink, sel); sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
} }
} }
@ -1457,7 +1496,8 @@ static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
upb_json_parser *upb_json_parser_create(upb_env *env, upb_json_parser *upb_json_parser_create(upb_env *env,
const upb_json_parsermethod *method, const upb_json_parsermethod *method,
upb_sink *output) { upb_sink *output,
bool ignore_json_unknown) {
#ifndef NDEBUG #ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env); const size_t size_before = upb_env_bytesallocated(env);
#endif #endif
@ -1476,6 +1516,8 @@ upb_json_parser *upb_json_parser_create(upb_env *env,
p->top->m = upb_handlers_msgdef(output->handlers); p->top->m = upb_handlers_msgdef(output->handlers);
set_name_table(p, p->top); set_name_table(p, p->top);
p->ignore_json_unknown = ignore_json_unknown;
/* If this fails, uncomment and increase the value in parser.h. */ /* If this fails, uncomment and increase the value in parser.h. */
/* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <= UPB_ASSERT_DEBUGVAR(upb_env_bytesallocated(env) - size_before <=

Loading…
Cancel
Save