Fix json ignore unknown (#128)

* Fix json ignore unknown

Previously, there were several problems with ignoring unknown in json.
1) After finding a field is unknown, the parser's state is not changed. Thus, there is no way to distinguish whether the parser is dealing with an unknown field or it's just a top level message.
2) Several method didn't respect unknown field, e.g., start_object, end_bool, start_array.

* Update json parser size

* Update json parser size
pull/13171/head
Paul Yang 6 years ago committed by GitHub
parent b258ae3235
commit 3c742bfdc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 156
      upb/json/parser.c
  2. 2
      upb/json/parser.h
  3. 44
      upb/json/parser.rl

@ -113,6 +113,9 @@ typedef struct {
* because |f| is the field in the *current* message (i.e., the map-entry
* message itself), not the parent's field that leads to this map. */
const upb_fielddef *mapfield;
/* True if the field to be parsed is unknown. */
bool is_unknown_field;
} upb_jsonparser_frame;
struct upb_json_parser {
@ -918,6 +921,10 @@ static bool end_bool(upb_json_parser *p, bool val) {
start_value_object(p, VALUE_BOOLVALUE);
}
if (p->top->is_unknown_field) {
return true;
}
if (!parser_putbool(p, val)) {
return false;
}
@ -1024,6 +1031,7 @@ static bool start_stringval(upb_json_parser *p) {
inner->name_table = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@ -1515,6 +1523,7 @@ static bool handle_mapentry(upb_json_parser *p) {
inner->name_table = NULL;
inner->mapfield = mapfield;
inner->is_map = false;
inner->is_unknown_field = false;
/* Don't set this to true *yet* -- we reuse parsing handlers below to push
* the key field value to the sink, and these handlers will pop the frame
@ -1545,6 +1554,8 @@ static bool end_membername(upb_json_parser *p) {
UPB_ASSERT(!p->top->f);
if (!p->top->m) {
p->top->is_unknown_field = true;
multipart_end(p);
return true;
}
@ -1561,6 +1572,7 @@ static bool end_membername(upb_json_parser *p) {
return true;
} else if (p->ignore_json_unknown) {
p->top->is_unknown_field = true;
multipart_end(p);
return true;
} else {
@ -1592,10 +1604,11 @@ static void end_member(upb_json_parser *p) {
}
p->top->f = NULL;
p->top->is_unknown_field = false;
}
static bool start_subobject(upb_json_parser *p) {
if (p->top->f == NULL) {
if (p->top->is_unknown_field) {
upb_jsonparser_frame *inner;
if (!check_stack(p)) return false;
@ -1604,6 +1617,7 @@ static bool start_subobject(upb_json_parser *p) {
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
}
@ -1625,6 +1639,7 @@ static bool start_subobject(upb_json_parser *p) {
inner->f = NULL;
inner->is_map = true;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
@ -1645,6 +1660,7 @@ static bool start_subobject(upb_json_parser *p) {
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
@ -1744,7 +1760,18 @@ static bool start_array(upb_json_parser *p) {
start_listvalue_object(p);
}
UPB_ASSERT(p->top->f);
if (p->top->is_unknown_field) {
inner = p->top + 1;
inner->m = NULL;
inner->name_table = NULL;
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = true;
p->top = inner;
return true;
}
if (!upb_fielddef_isseq(p->top->f)) {
upb_status_seterrf(&p->status,
@ -1764,6 +1791,7 @@ static bool start_array(upb_json_parser *p) {
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
@ -1775,6 +1803,11 @@ static void end_array(upb_json_parser *p) {
UPB_ASSERT(p->top > p->stack);
p->top--;
if (p->top->is_unknown_field) {
return;
}
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
@ -1794,13 +1827,13 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
if (!p->top->is_map) {
if (!p->top->is_map && p->top->m != NULL) {
upb_sink_startmsg(&p->top->sink);
}
}
static void end_object(upb_json_parser *p) {
if (!p->top->is_map) {
if (!p->top->is_map && p->top->m != NULL) {
upb_status status;
upb_status_clear(&status);
upb_sink_endmsg(&p->top->sink, &status);
@ -1912,7 +1945,7 @@ static void end_structvalue_object(upb_json_parser *p) {
}
static bool is_top_level(upb_json_parser *p) {
return p->top == p->stack && p->top->f == NULL;
return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
}
static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
@ -1975,11 +2008,11 @@ static bool is_string_wrapper_object(upb_json_parser *p) {
* final state once, when the closing '"' is seen. */
#line 2114 "upb/json/parser.rl"
#line 2147 "upb/json/parser.rl"
#line 1983 "upb/json/parser.c"
#line 2016 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 1, 1, 3, 1,
4, 1, 6, 1, 7, 1, 8, 1,
@ -2230,7 +2263,7 @@ static const int json_en_value_machine = 76;
static const int json_en_main = 1;
#line 2117 "upb/json/parser.rl"
#line 2150 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@ -2253,7 +2286,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf);
#line 2257 "upb/json/parser.c"
#line 2290 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@ -2328,83 +2361,83 @@ _match:
switch ( *_acts++ )
{
case 1:
#line 1988 "upb/json/parser.rl"
#line 2021 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 2:
#line 1990 "upb/json/parser.rl"
#line 2023 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 24; goto _again;} }
break;
case 3:
#line 1994 "upb/json/parser.rl"
#line 2027 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 4:
#line 1995 "upb/json/parser.rl"
#line 2028 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 5:
#line 2001 "upb/json/parser.rl"
#line 2034 "upb/json/parser.rl"
{ start_hex(parser); }
break;
case 6:
#line 2002 "upb/json/parser.rl"
#line 2035 "upb/json/parser.rl"
{ hexdigit(parser, p); }
break;
case 7:
#line 2003 "upb/json/parser.rl"
#line 2036 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 8:
#line 2009 "upb/json/parser.rl"
#line 2042 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 9:
#line 2015 "upb/json/parser.rl"
#line 2048 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 10:
#line 2027 "upb/json/parser.rl"
#line 2060 "upb/json/parser.rl"
{ start_duration_base(parser, p); }
break;
case 11:
#line 2028 "upb/json/parser.rl"
#line 2061 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_duration_base(parser, p)); }
break;
case 12:
#line 2030 "upb/json/parser.rl"
#line 2063 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 13:
#line 2035 "upb/json/parser.rl"
#line 2068 "upb/json/parser.rl"
{ start_timestamp_base(parser, p); }
break;
case 14:
#line 2036 "upb/json/parser.rl"
#line 2069 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_timestamp_base(parser, p)); }
break;
case 15:
#line 2038 "upb/json/parser.rl"
#line 2071 "upb/json/parser.rl"
{ start_timestamp_fraction(parser, p); }
break;
case 16:
#line 2039 "upb/json/parser.rl"
#line 2072 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
break;
case 17:
#line 2041 "upb/json/parser.rl"
#line 2074 "upb/json/parser.rl"
{ start_timestamp_zone(parser, p); }
break;
case 18:
#line 2042 "upb/json/parser.rl"
#line 2075 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
break;
case 19:
#line 2044 "upb/json/parser.rl"
#line 2077 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 20:
#line 2049 "upb/json/parser.rl"
#line 2082 "upb/json/parser.rl"
{
if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
{stack[top++] = cs; cs = 48; goto _again;}
@ -2416,78 +2449,78 @@ _match:
}
break;
case 21:
#line 2060 "upb/json/parser.rl"
#line 2093 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 76; goto _again;} }
break;
case 22:
#line 2065 "upb/json/parser.rl"
#line 2098 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 23:
#line 2066 "upb/json/parser.rl"
#line 2099 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_membername(parser)); }
break;
case 24:
#line 2069 "upb/json/parser.rl"
#line 2102 "upb/json/parser.rl"
{ end_member(parser); }
break;
case 25:
#line 2075 "upb/json/parser.rl"
#line 2108 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 26:
#line 2078 "upb/json/parser.rl"
#line 2111 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 27:
#line 2084 "upb/json/parser.rl"
#line 2117 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 28:
#line 2088 "upb/json/parser.rl"
#line 2121 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 29:
#line 2093 "upb/json/parser.rl"
#line 2126 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_number(parser, p)); }
break;
case 30:
#line 2094 "upb/json/parser.rl"
#line 2127 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 31:
#line 2096 "upb/json/parser.rl"
#line 2129 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 32:
#line 2097 "upb/json/parser.rl"
#line 2130 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 33:
#line 2099 "upb/json/parser.rl"
#line 2132 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_bool(parser, true)); }
break;
case 34:
#line 2101 "upb/json/parser.rl"
#line 2134 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_bool(parser, false)); }
break;
case 35:
#line 2103 "upb/json/parser.rl"
#line 2136 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_null(parser)); }
break;
case 36:
#line 2105 "upb/json/parser.rl"
#line 2138 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject_full(parser)); }
break;
case 37:
#line 2106 "upb/json/parser.rl"
#line 2139 "upb/json/parser.rl"
{ end_subobject_full(parser); }
break;
case 38:
#line 2111 "upb/json/parser.rl"
#line 2144 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
#line 2491 "upb/json/parser.c"
#line 2524 "upb/json/parser.c"
}
}
@ -2504,34 +2537,34 @@ _again:
while ( __nacts-- > 0 ) {
switch ( *__acts++ ) {
case 0:
#line 1986 "upb/json/parser.rl"
#line 2019 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 26:
#line 2078 "upb/json/parser.rl"
#line 2111 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 30:
#line 2094 "upb/json/parser.rl"
#line 2127 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 33:
#line 2099 "upb/json/parser.rl"
#line 2132 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_bool(parser, true)); }
break;
case 34:
#line 2101 "upb/json/parser.rl"
#line 2134 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_bool(parser, false)); }
break;
case 35:
#line 2103 "upb/json/parser.rl"
#line 2136 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_null(parser)); }
break;
case 37:
#line 2106 "upb/json/parser.rl"
#line 2139 "upb/json/parser.rl"
{ end_subobject_full(parser); }
break;
#line 2535 "upb/json/parser.c"
#line 2568 "upb/json/parser.c"
}
}
}
@ -2539,7 +2572,7 @@ _again:
_out: {}
}
#line 2139 "upb/json/parser.rl"
#line 2172 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
@ -2571,9 +2604,9 @@ bool end(void *closure, const void *hd) {
parse(parser, hd, &eof_ch, 0, NULL);
return parser->current_state >=
#line 2575 "upb/json/parser.c"
#line 2608 "upb/json/parser.c"
105
#line 2169 "upb/json/parser.rl"
#line 2202 "upb/json/parser.rl"
;
}
@ -2585,16 +2618,17 @@ static void json_parser_reset(upb_json_parser *p) {
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
p->top->is_unknown_field = false;
/* Emit Ragel initialization of the parser. */
#line 2592 "upb/json/parser.c"
#line 2626 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
#line 2183 "upb/json/parser.rl"
#line 2217 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
accumulate_clear(p);

@ -29,7 +29,7 @@ UPB_DECLARE_DERIVED_TYPE(upb::json::ParserMethod, upb::RefCounted,
* constructed. This hint may be an overestimate for some build configurations.
* But if the parser library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_JSON_PARSER_SIZE 4160
#define UPB_JSON_PARSER_SIZE 4672
#ifdef __cplusplus

@ -111,6 +111,9 @@ typedef struct {
* because |f| is the field in the *current* message (i.e., the map-entry
* message itself), not the parent's field that leads to this map. */
const upb_fielddef *mapfield;
/* True if the field to be parsed is unknown. */
bool is_unknown_field;
} upb_jsonparser_frame;
struct upb_json_parser {
@ -916,6 +919,10 @@ static bool end_bool(upb_json_parser *p, bool val) {
start_value_object(p, VALUE_BOOLVALUE);
}
if (p->top->is_unknown_field) {
return true;
}
if (!parser_putbool(p, val)) {
return false;
}
@ -1022,6 +1029,7 @@ static bool start_stringval(upb_json_parser *p) {
inner->name_table = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@ -1513,6 +1521,7 @@ static bool handle_mapentry(upb_json_parser *p) {
inner->name_table = NULL;
inner->mapfield = mapfield;
inner->is_map = false;
inner->is_unknown_field = false;
/* Don't set this to true *yet* -- we reuse parsing handlers below to push
* the key field value to the sink, and these handlers will pop the frame
@ -1543,6 +1552,8 @@ static bool end_membername(upb_json_parser *p) {
UPB_ASSERT(!p->top->f);
if (!p->top->m) {
p->top->is_unknown_field = true;
multipart_end(p);
return true;
}
@ -1559,6 +1570,7 @@ static bool end_membername(upb_json_parser *p) {
return true;
} else if (p->ignore_json_unknown) {
p->top->is_unknown_field = true;
multipart_end(p);
return true;
} else {
@ -1590,10 +1602,11 @@ static void end_member(upb_json_parser *p) {
}
p->top->f = NULL;
p->top->is_unknown_field = false;
}
static bool start_subobject(upb_json_parser *p) {
if (p->top->f == NULL) {
if (p->top->is_unknown_field) {
upb_jsonparser_frame *inner;
if (!check_stack(p)) return false;
@ -1602,6 +1615,7 @@ static bool start_subobject(upb_json_parser *p) {
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
}
@ -1623,6 +1637,7 @@ static bool start_subobject(upb_json_parser *p) {
inner->f = NULL;
inner->is_map = true;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
@ -1643,6 +1658,7 @@ static bool start_subobject(upb_json_parser *p) {
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
@ -1742,7 +1758,18 @@ static bool start_array(upb_json_parser *p) {
start_listvalue_object(p);
}
UPB_ASSERT(p->top->f);
if (p->top->is_unknown_field) {
inner = p->top + 1;
inner->m = NULL;
inner->name_table = NULL;
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = true;
p->top = inner;
return true;
}
if (!upb_fielddef_isseq(p->top->f)) {
upb_status_seterrf(&p->status,
@ -1762,6 +1789,7 @@ static bool start_array(upb_json_parser *p) {
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
inner->is_unknown_field = false;
p->top = inner;
return true;
@ -1773,6 +1801,11 @@ static void end_array(upb_json_parser *p) {
UPB_ASSERT(p->top > p->stack);
p->top--;
if (p->top->is_unknown_field) {
return;
}
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
@ -1792,13 +1825,13 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
if (!p->top->is_map) {
if (!p->top->is_map && p->top->m != NULL) {
upb_sink_startmsg(&p->top->sink);
}
}
static void end_object(upb_json_parser *p) {
if (!p->top->is_map) {
if (!p->top->is_map && p->top->m != NULL) {
upb_status status;
upb_status_clear(&status);
upb_sink_endmsg(&p->top->sink, &status);
@ -1910,7 +1943,7 @@ static void end_structvalue_object(upb_json_parser *p) {
}
static bool is_top_level(upb_json_parser *p) {
return p->top == p->stack && p->top->f == NULL;
return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
}
static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
@ -2177,6 +2210,7 @@ static void json_parser_reset(upb_json_parser *p) {
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
p->top->is_unknown_field = false;
/* Emit Ragel initialization of the parser. */
%% write init;

Loading…
Cancel
Save