Fixed JSON parser issue: missing NUL byte in parse_number() in some code

paths. Also, fix unused-function warnings.
pull/13171/head
Chris Fallin 10 years ago
parent 17be1ef0f2
commit 099d57346a
  1. 316
      upb/json/parser.c
  2. 13
      upb/json/parser.rl
  3. 12
      upb/json/printer.c

@ -508,11 +508,17 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool parse_number(upb_json_parser *p);
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
return parse_number(p);
}
static bool parse_number(upb_json_parser *p) {
// strtol() and friends unfortunately do not support specifying the length of
// the input string, so we need to force a copy into a NULL-terminated buffer.
if (!multipart_text(p, "\0", 1, false)) {
@ -522,8 +528,8 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len - 1; // One for NULL.
char *end;
char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
@ -579,6 +585,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
}
multipart_end(p);
return true;
err:
@ -597,6 +604,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
@ -613,6 +621,8 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@ -690,6 +700,7 @@ static bool end_stringval(upb_json_parser *p) {
}
multipart_end(p);
return ok;
}
@ -698,54 +709,217 @@ static void start_member(upb_json_parser *p) {
multipart_startaccum(p);
}
static bool end_member(upb_json_parser *p) {
assert(!p->top->f);
// Helper: invoked during parse_mapentry() to emit the mapentry message's key
// field based on the current contents of the accumulate buffer.
static bool parse_mapentry_key(upb_json_parser *p) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
// Emit the key field. We do a bit of ad-hoc parsing here because the
// parser state machine has already decided that this is a string field
// name, and we are reinterpreting it as some arbitrary key type. In
// particular, integer and bool keys are quoted, so we need to parse the
// quoted string contents here.
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no key");
return false;
}
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_INT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
// Invoke end_number. The accum buffer has the number's text already.
if (!parse_number(p)) {
return false;
}
break;
case UPB_TYPE_BOOL:
if (len == 4 && !strncmp(buf, "true", 4)) {
if (!parser_putbool(p, true)) {
return false;
}
} else if (len == 5 && !strncmp(buf, "false", 5)) {
if (!parser_putbool(p, false)) {
return false;
}
} else {
upb_status_seterrmsg(p->status,
"Map bool key not 'true' or 'false'");
return false;
}
multipart_end(p);
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
upb_sink subsink;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, len, &subsink);
sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
upb_sink_putstring(&subsink, sel, buf, len, NULL);
sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&subsink, sel);
multipart_end(p);
break;
}
default:
upb_status_seterrmsg(p->status, "Invalid field type for map key");
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
// Helper: emit one map entry (as a submessage in the map field sequence). This
// is invoked from end_membername(), at the end of the map entry's key string,
// with the map key in the accumulate buffer. It parses the key from that
// buffer, emits the handler calls to start the mapentry submessage (setting up
// its subframe in the process), and sets up state in the subframe so that the
// value parser (invoked next) will emit the mapentry's value field and then
// end the mapentry message.
static bool handle_mapentry(upb_json_parser *p) {
// Map entry: p->top->sink is the seq frame, so we need to start a frame
// for the mapentry itself, and then set |f| in that frame so that the map
// value field is parsed, and also set a flag to end the frame after the
// map-entry value is parsed.
if (!check_stack(p)) return false;
const upb_fielddef *mapfield = p->top->mapfield;
const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
upb_jsonparser_frame *inner = p->top + 1;
p->top->f = mapfield;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->mapfield = mapfield;
inner->is_map = false;
// Don't set this to true *yet* -- we reuse parsing handlers below to push
// the key field value to the sink, and these handlers will pop the frame
// if they see is_mapentry (when invoked by the parser state machine, they
// would have just seen the map-entry value, not key).
inner->is_mapentry = false;
p->top = inner;
// send STARTMSG in submsg frame.
upb_sink_startmsg(&p->top->sink);
parse_mapentry_key(p);
// Set up the value field to receive the map-entry value.
p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
p->top->is_mapentry = true; // set up to pop frame after value is parsed.
p->top->mapfield = mapfield;
if (p->top->f == NULL) {
upb_status_seterrmsg(p->status, "mapentry message has no value");
return false;
}
return true;
}
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool end_membername(upb_json_parser *p) {
assert(!p->top->f);
if (p->top->is_map) {
return handle_mapentry(p);
} else {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
}
static void end_member(upb_json_parser *p) {
// If we just parsed a map-entry value, end that frame too.
if (p->top->is_mapentry) {
assert(p->top > p->stack);
// send ENDMSG on submsg.
upb_status s = UPB_STATUS_INIT;
upb_sink_endmsg(&p->top->sink, &s);
const upb_fielddef* mapfield = p->top->mapfield;
// send ENDSUBMSG in repeated-field-of-mapentries frame.
p->top--;
upb_selector_t sel;
bool ok = upb_handlers_getselector(mapfield,
UPB_HANDLER_ENDSUBMSG, &sel);
UPB_ASSERT_VAR(ok, ok);
upb_sink_endsubmsg(&p->top->sink, sel);
}
p->top->f = NULL;
}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
if (upb_fielddef_ismap(p->top->f)) {
// Beginning of a map. Start a new parser frame in a repeated-field
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->mapfield = p->top->f;
inner->f = NULL;
inner->is_map = true;
inner->is_mapentry = false;
p->top = inner;
return true;
} else if (upb_fielddef_issubmsg(p->top->f)) {
// Beginning of a subobject. Start a new parser frame in the submsg
// context.
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
} else {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
return true;
}
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
if (p->top->is_map) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
} else {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
}
static bool start_array(upb_json_parser *p) {
@ -765,6 +939,8 @@ static bool start_array(upb_json_parser *p) {
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
return true;
@ -779,12 +955,16 @@ static void end_array(upb_json_parser *p) {
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
if (!p->top->is_map) {
upb_sink_startmsg(&p->top->sink);
}
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
if (!p->top->is_map) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
}
@ -809,11 +989,11 @@ static void end_object(upb_json_parser *p) {
// final state once, when the closing '"' is seen.
#line 905 "upb/json/parser.rl"
#line 1085 "upb/json/parser.rl"
#line 817 "upb/json/parser.c"
#line 997 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
5, 1, 6, 1, 7, 1, 8, 1,
@ -964,7 +1144,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
#line 908 "upb/json/parser.rl"
#line 1088 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@ -984,7 +1164,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf);
#line 988 "upb/json/parser.c"
#line 1168 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@ -1059,118 +1239,118 @@ _match:
switch ( *_acts++ )
{
case 0:
#line 820 "upb/json/parser.rl"
#line 1000 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
#line 821 "upb/json/parser.rl"
#line 1001 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
#line 825 "upb/json/parser.rl"
#line 1005 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
#line 826 "upb/json/parser.rl"
#line 1006 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 4:
#line 832 "upb/json/parser.rl"
#line 1012 "upb/json/parser.rl"
{ start_hex(parser); }
break;
case 5:
#line 833 "upb/json/parser.rl"
#line 1013 "upb/json/parser.rl"
{ hexdigit(parser, p); }
break;
case 6:
#line 834 "upb/json/parser.rl"
#line 1014 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 7:
#line 840 "upb/json/parser.rl"
#line 1020 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 8:
#line 846 "upb/json/parser.rl"
#line 1026 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 9:
#line 849 "upb/json/parser.rl"
#line 1029 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 10:
#line 851 "upb/json/parser.rl"
#line 1031 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 11:
#line 856 "upb/json/parser.rl"
#line 1036 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 12:
#line 857 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_member(parser)); }
#line 1037 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_membername(parser)); }
break;
case 13:
#line 860 "upb/json/parser.rl"
{ clear_member(parser); }
#line 1040 "upb/json/parser.rl"
{ end_member(parser); }
break;
case 14:
#line 866 "upb/json/parser.rl"
#line 1046 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 15:
#line 869 "upb/json/parser.rl"
#line 1049 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 16:
#line 875 "upb/json/parser.rl"
#line 1055 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 17:
#line 879 "upb/json/parser.rl"
#line 1059 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 18:
#line 884 "upb/json/parser.rl"
#line 1064 "upb/json/parser.rl"
{ start_number(parser, p); }
break;
case 19:
#line 885 "upb/json/parser.rl"
#line 1065 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 20:
#line 887 "upb/json/parser.rl"
#line 1067 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 21:
#line 888 "upb/json/parser.rl"
#line 1068 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 22:
#line 890 "upb/json/parser.rl"
#line 1070 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break;
case 23:
#line 892 "upb/json/parser.rl"
#line 1072 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break;
case 24:
#line 894 "upb/json/parser.rl"
#line 1074 "upb/json/parser.rl"
{ /* null value */ }
break;
case 25:
#line 896 "upb/json/parser.rl"
#line 1076 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 26:
#line 897 "upb/json/parser.rl"
#line 1077 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 27:
#line 902 "upb/json/parser.rl"
#line 1082 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
#line 1174 "upb/json/parser.c"
#line 1354 "upb/json/parser.c"
}
}
@ -1183,7 +1363,7 @@ _again:
_out: {}
}
#line 927 "upb/json/parser.rl"
#line 1107 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@ -1227,18 +1407,20 @@ void upb_json_parser_uninit(upb_json_parser *p) {
void upb_json_parser_reset(upb_json_parser *p) {
p->top = p->stack;
p->top->f = NULL;
p->top->is_map = false;
p->top->is_mapentry = false;
int cs;
int top;
// Emit Ragel initialization of the parser.
#line 1236 "upb/json/parser.c"
#line 1418 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
#line 975 "upb/json/parser.rl"
#line 1157 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
accumulate_clear(p);

@ -221,6 +221,7 @@ badpadding:
// the true value in a contiguous buffer.
static void assert_accumulate_empty(upb_json_parser *p) {
UPB_UNUSED(p);
assert(p->accumulated == NULL);
assert(p->accumulated_len == 0);
}
@ -505,13 +506,17 @@ static void start_number(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool parse_number(upb_json_parser *p, const char *buf, const char *end);
static bool parse_number(upb_json_parser *p);
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
return parse_number(p);
}
static bool parse_number(upb_json_parser *p) {
// strtol() and friends unfortunately do not support specifying the length of
// the input string, so we need to force a copy into a NULL-terminated buffer.
if (!multipart_text(p, "\0", 1, false)) {
@ -521,11 +526,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len - 1; // One for NULL.
return parse_number(p, buf, myend);
}
static bool parse_number(upb_json_parser *p, const char *buf,
const char *myend) {
char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
@ -730,7 +731,7 @@ static bool parse_mapentry_key(upb_json_parser *p) {
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
// Invoke end_number. The accum buffer has the number's text already.
if (!parse_number(p, buf, buf + len)) {
if (!parse_number(p)) {
return false;
}
break;

@ -187,7 +187,9 @@ static bool putkey(void *closure, const void *handler_data) {
print_comma(p); \
CHK(put##type(closure, handler_data, val)); \
return true; \
} \
}
#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
static bool putmapkey_##type(void *closure, const void *handler_data, \
type val) { \
upb_json_printer *p = closure; \
@ -205,7 +207,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64);
TYPE_HANDLERS(int64_t, fmt_int64);
TYPE_HANDLERS(uint64_t, fmt_uint64);
// double and float are not allowed to be map keys.
TYPE_HANDLERS_MAPKEY(bool, fmt_bool);
TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64);
TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64);
#undef TYPE_HANDLERS
#undef TYPE_HANDLERS_MAPKEY
typedef struct {
void *keyname;

Loading…
Cancel
Save