Merge pull request #46 from haberman/jsoncamel

Changed JSON parser/printer to correctly camelCase names.
pull/13171/head
Joshua Haberman 9 years ago
commit 32236c9cbc
  1. 56
      tests/json/test_json.cc
  2. 40
      upb/def.c
  3. 46
      upb/def.h
  4. BIN
      upb/descriptor/descriptor.pb
  5. 214
      upb/json/parser.c
  6. 61
      upb/json/parser.h
  7. 144
      upb/json/parser.rl
  8. 21
      upb/json/printer.c

@ -30,83 +30,83 @@ bool verbose = false;
static TestCase kTestRoundtripMessages[] = {
// Test most fields here.
{
TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\","
"\"optional_msg\":{\"foo\":42},"
"\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1},"
TEST("{\"optionalInt32\":-42,\"optionalString\":\"Test\\u0001Message\","
"\"optionalMsg\":{\"foo\":42},"
"\"optionalBool\":true,\"repeatedMsg\":[{\"foo\":1},"
"{\"foo\":2}]}"),
EXPECT_SAME
},
// Test special escapes in strings.
{
TEST("{\"repeated_string\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\","
TEST("{\"repeatedString\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\","
"\"\uFFFF\"]}"),
EXPECT_SAME
},
// Test enum symbolic names.
{
// The common case: parse and print the symbolic name.
TEST("{\"optional_enum\":\"A\"}"),
TEST("{\"optionalEnum\":\"A\"}"),
EXPECT_SAME
},
{
// Unknown enum value: will be printed as an integer.
TEST("{\"optional_enum\":42}"),
TEST("{\"optionalEnum\":42}"),
EXPECT_SAME
},
{
// Known enum value: we're happy to parse an integer but we will re-emit the
// symbolic name.
TEST("{\"optional_enum\":1}"),
EXPECT("{\"optional_enum\":\"B\"}")
TEST("{\"optionalEnum\":1}"),
EXPECT("{\"optionalEnum\":\"B\"}")
},
// UTF-8 tests: escapes -> literal UTF8 in output.
{
// Note double escape on \uXXXX: we want the escape to be processed by the
// JSON parser, not by the C++ compiler!
TEST("{\"optional_string\":\"\\u007F\"}"),
EXPECT("{\"optional_string\":\"\x7F\"}")
TEST("{\"optionalString\":\"\\u007F\"}"),
EXPECT("{\"optionalString\":\"\x7F\"}")
},
{
TEST("{\"optional_string\":\"\\u0080\"}"),
EXPECT("{\"optional_string\":\"\xC2\x80\"}")
TEST("{\"optionalString\":\"\\u0080\"}"),
EXPECT("{\"optionalString\":\"\xC2\x80\"}")
},
{
TEST("{\"optional_string\":\"\\u07FF\"}"),
EXPECT("{\"optional_string\":\"\xDF\xBF\"}")
TEST("{\"optionalString\":\"\\u07FF\"}"),
EXPECT("{\"optionalString\":\"\xDF\xBF\"}")
},
{
TEST("{\"optional_string\":\"\\u0800\"}"),
EXPECT("{\"optional_string\":\"\xE0\xA0\x80\"}")
TEST("{\"optionalString\":\"\\u0800\"}"),
EXPECT("{\"optionalString\":\"\xE0\xA0\x80\"}")
},
{
TEST("{\"optional_string\":\"\\uFFFF\"}"),
EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}")
TEST("{\"optionalString\":\"\\uFFFF\"}"),
EXPECT("{\"optionalString\":\"\xEF\xBF\xBF\"}")
},
// map-field tests
{
TEST("{\"map_string_string\":{\"a\":\"value1\",\"b\":\"value2\","
TEST("{\"mapStringString\":{\"a\":\"value1\",\"b\":\"value2\","
"\"c\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_int32_string\":{\"1\":\"value1\",\"-1\":\"value2\","
TEST("{\"mapInt32String\":{\"1\":\"value1\",\"-1\":\"value2\","
"\"1234\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_bool_string\":{\"false\":\"value1\",\"true\":\"value2\"}}"),
TEST("{\"mapBoolString\":{\"false\":\"value1\",\"true\":\"value2\"}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_int32\":{\"asdf\":1234,\"jkl;\":-1}}"),
TEST("{\"mapStringInt32\":{\"asdf\":1234,\"jkl;\":-1}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_bool\":{\"asdf\":true,\"jkl;\":false}}"),
TEST("{\"mapStringBool\":{\"asdf\":true,\"jkl;\":false}}"),
EXPECT_SAME
},
{
TEST("{\"map_string_msg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"),
TEST("{\"mapStringMsg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"),
EXPECT_SAME
},
TEST_SENTINEL
@ -287,13 +287,14 @@ class StringSink {
void test_json_roundtrip_message(const char* json_src,
const char* json_expected,
const upb::Handlers* serialize_handlers,
const upb::json::ParserMethod* parser_method,
int seam) {
VerboseParserEnvironment env(verbose);
StringSink data_sink;
upb::json::Printer* printer = upb::json::Printer::Create(
env.env(), serialize_handlers, data_sink.Sink());
upb::json::Parser* parser =
upb::json::Parser::Create(env.env(), printer->input());
upb::json::Parser::Create(env.env(), parser_method, printer->input());
env.ResetBytesSink(parser->input());
env.Reset(json_src, strlen(json_src), false, false);
@ -323,6 +324,8 @@ void test_json_roundtrip() {
const upb::MessageDef* md = BuildTestMessage(symtab.get());
upb::reffed_ptr<const upb::Handlers> serialize_handlers(
upb::json::Printer::NewHandlers(md));
upb::reffed_ptr<const upb::json::ParserMethod> parser_method(
upb::json::ParserMethod::New(md));
for (const TestCase* test_case = kTestRoundtripMessages;
test_case->input != NULL; test_case++) {
@ -333,7 +336,8 @@ void test_json_roundtrip() {
for (size_t i = 0; i < strlen(test_case->input); i++) {
test_json_roundtrip_message(test_case->input, expected,
serialize_handlers.get(), i);
serialize_handlers.get(), parser_method.get(),
i);
}
}
}

@ -1,6 +1,7 @@
#include "upb/def.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "upb/structdefs.int.h"
@ -721,6 +722,45 @@ const char *upb_fielddef_name(const upb_fielddef *f) {
return upb_def_fullname(upb_fielddef_upcast(f));
}
size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
const char *name = upb_fielddef_name(f);
size_t src, dst = 0;
bool ucase_next = false;
#define WRITE(byte) \
++dst; \
if (dst < len) buf[dst - 1] = byte; \
else if (dst == len) buf[dst - 1] = '\0'
if (!name) {
WRITE('\0');
return 0;
}
/* Implement the transformation as described in the spec:
* 1. upper case all letters after an underscore.
* 2. remove all underscores.
*/
for (src = 0; name[src]; src++) {
if (name[src] == '_') {
ucase_next = true;
continue;
}
if (ucase_next) {
WRITE(toupper(name[src]));
ucase_next = false;
} else {
WRITE(name[src]);
}
}
WRITE('\0');
return dst;
#undef WRITE
}
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
return f->msg_is_symbolic ? NULL : f->msg.def;
}

@ -307,6 +307,27 @@ class upb::FieldDef {
uint32_t number() const; /* Returns 0 if uninitialized. */
bool is_extension() const;
/* Copies the JSON name for this field into the given buffer. Returns the
* actual size of the JSON name, including the NULL terminator. If the
* return value is 0, the JSON name is unset. If the return value is
* greater than len, the JSON name was truncated. The buffer is always
* NULL-terminated if len > 0.
*
* The JSON name always defaults to a camelCased version of the regular
* name. However if the regular name is unset, the JSON name will be unset
* also.
*/
size_t GetJsonName(char* buf, size_t len) const;
/* Convenience version of the above function which copies the JSON name
* into the given string, returning false if the name is not set. */
template <class T>
bool GetJsonName(T* str) {
str->resize(GetJsonName(NULL, 0));
GetJsonName(&(*str)[0], str->size());
return str->size() > 0;
}
/* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false,
* indicates whether this field should have lazy parsing handlers that yield
* the unparsed string for the submessage.
@ -472,6 +493,16 @@ class upb::FieldDef {
bool set_name(const char* name, upb::Status* s);
bool set_name(const std::string& name, upb::Status* s);
/* Sets the JSON name to the given string. */
/* TODO(haberman): implement. Right now only default json_name (camelCase)
* is supported. */
bool set_json_name(const char* json_name, upb::Status* s);
bool set_json_name(const std::string& name, upb::Status* s);
/* Clears the JSON name. This will make it revert to its default, which is
* a camelCased version of the regular field name. */
void clear_json_name();
void set_integer_format(IntegerFormat format);
bool set_tag_delimited(bool tag_delimited, upb::Status* s);
@ -536,6 +567,7 @@ const char *upb_fielddef_name(const upb_fielddef *f);
bool upb_fielddef_isextension(const upb_fielddef *f);
bool upb_fielddef_lazy(const upb_fielddef *f);
bool upb_fielddef_packed(const upb_fielddef *f);
size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len);
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f);
const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f);
upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f);
@ -570,6 +602,8 @@ void upb_fielddef_setdescriptortype(upb_fielddef *f, int type);
void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label);
bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s);
bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s);
bool upb_fielddef_setjsonname(upb_fielddef *f, const char *name, upb_status *s);
bool upb_fielddef_clearjsonname(upb_fielddef *f);
bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
upb_status *s);
void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension);
@ -1316,6 +1350,9 @@ inline const char* FieldDef::name() const { return upb_fielddef_name(this); }
inline bool FieldDef::is_extension() const {
return upb_fielddef_isextension(this);
}
inline size_t FieldDef::GetJsonName(char* buf, size_t len) const {
return upb_fielddef_getjsonname(this, buf, len);
}
inline bool FieldDef::lazy() const {
return upb_fielddef_lazy(this);
}
@ -1346,6 +1383,15 @@ inline bool FieldDef::set_name(const char *name, Status* s) {
inline bool FieldDef::set_name(const std::string& name, Status* s) {
return upb_fielddef_setname(this, upb_safecstr(name), s);
}
inline bool FieldDef::set_json_name(const char *name, Status* s) {
return upb_fielddef_setjsonname(this, name, s);
}
inline bool FieldDef::set_json_name(const std::string& name, Status* s) {
return upb_fielddef_setjsonname(this, upb_safecstr(name), s);
}
inline void FieldDef::clear_json_name() {
upb_fielddef_clearjsonname(this);
}
inline bool FieldDef::set_containing_type_name(const char *name, Status* s) {
return upb_fielddef_setcontainingtypename(this, name, s);
}

Binary file not shown.

@ -40,6 +40,9 @@ typedef struct {
const upb_msgdef *m;
const upb_fielddef *f;
/* The table mapping json name to fielddef for this message. */
upb_strtable *name_table;
/* We are in a repeated-field context, ready to emit mapentries as
* submessages. This flag alters the start-of-object (open-brace) behavior to
* begin a sequence of mapentry messages rather than a single submessage. */
@ -60,7 +63,7 @@ typedef struct {
struct upb_json_parser {
upb_env *env;
upb_byteshandler input_handler_;
const upb_json_parsermethod *method;
upb_bytessink input_;
/* Stack to track the JSON scopes we are in. */
@ -95,6 +98,19 @@ struct upb_json_parser {
uint32_t digit;
};
struct upb_json_parsermethod {
upb_refcounted base;
upb_byteshandler input_handler_;
/* Mainly for the purposes of refcounting, so all the fielddefs we point
* to stay alive. */
const upb_msgdef *msg;
/* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
upb_inttable name_tables;
};
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
/* Used to signal that a capture has been suspended. */
@ -123,6 +139,13 @@ static bool check_stack(upb_json_parser *p) {
return true;
}
static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
upb_value v;
bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
UPB_ASSERT_VAR(ok, ok);
frame->name_table = upb_value_getptr(v);
}
/* There are GCC/Clang built-ins for overflow checking which we could start
* using if there was any performance benefit to it. */
@ -719,6 +742,7 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->name_table = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
@ -905,6 +929,7 @@ static bool handle_mapentry(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->name_table = NULL;
inner->mapfield = mapfield;
inner->is_map = false;
@ -941,20 +966,20 @@ static bool end_membername(upb_json_parser *p) {
} else {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
upb_value v;
if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
p->top->f = upb_value_getconstptr(v);
multipart_end(p);
if (!f) {
return true;
} else {
/* TODO(haberman): Ignore unknown fields if requested/configured to do
* so. */
upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
upb_env_reporterror(p->env, &p->status);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
}
@ -996,6 +1021,7 @@ static bool start_subobject(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->name_table = NULL;
inner->mapfield = p->top->f;
inner->f = NULL;
inner->is_map = true;
@ -1016,6 +1042,7 @@ static bool start_subobject(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
set_name_table(p, inner);
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
@ -1065,6 +1092,7 @@ static bool start_array(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->name_table = NULL;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
@ -1122,11 +1150,11 @@ static void end_object(upb_json_parser *p) {
* final state once, when the closing '"' is seen. */
#line 1218 "upb/json/parser.rl"
#line 1246 "upb/json/parser.rl"
#line 1130 "upb/json/parser.c"
#line 1158 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
5, 1, 6, 1, 7, 1, 8, 1,
@ -1275,7 +1303,7 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
#line 1221 "upb/json/parser.rl"
#line 1249 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
@ -1297,7 +1325,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
capture_resume(parser, buf);
#line 1301 "upb/json/parser.c"
#line 1329 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@ -1372,118 +1400,118 @@ _match:
switch ( *_acts++ )
{
case 0:
#line 1133 "upb/json/parser.rl"
#line 1161 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
#line 1134 "upb/json/parser.rl"
#line 1162 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
#line 1138 "upb/json/parser.rl"
#line 1166 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
#line 1139 "upb/json/parser.rl"
#line 1167 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 4:
#line 1145 "upb/json/parser.rl"
#line 1173 "upb/json/parser.rl"
{ start_hex(parser); }
break;
case 5:
#line 1146 "upb/json/parser.rl"
#line 1174 "upb/json/parser.rl"
{ hexdigit(parser, p); }
break;
case 6:
#line 1147 "upb/json/parser.rl"
#line 1175 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 7:
#line 1153 "upb/json/parser.rl"
#line 1181 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 8:
#line 1159 "upb/json/parser.rl"
#line 1187 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 9:
#line 1162 "upb/json/parser.rl"
#line 1190 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 10:
#line 1164 "upb/json/parser.rl"
#line 1192 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 11:
#line 1169 "upb/json/parser.rl"
#line 1197 "upb/json/parser.rl"
{ start_member(parser); }
break;
case 12:
#line 1170 "upb/json/parser.rl"
#line 1198 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_membername(parser)); }
break;
case 13:
#line 1173 "upb/json/parser.rl"
#line 1201 "upb/json/parser.rl"
{ end_member(parser); }
break;
case 14:
#line 1179 "upb/json/parser.rl"
#line 1207 "upb/json/parser.rl"
{ start_object(parser); }
break;
case 15:
#line 1182 "upb/json/parser.rl"
#line 1210 "upb/json/parser.rl"
{ end_object(parser); }
break;
case 16:
#line 1188 "upb/json/parser.rl"
#line 1216 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); }
break;
case 17:
#line 1192 "upb/json/parser.rl"
#line 1220 "upb/json/parser.rl"
{ end_array(parser); }
break;
case 18:
#line 1197 "upb/json/parser.rl"
#line 1225 "upb/json/parser.rl"
{ start_number(parser, p); }
break;
case 19:
#line 1198 "upb/json/parser.rl"
#line 1226 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 20:
#line 1200 "upb/json/parser.rl"
#line 1228 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 21:
#line 1201 "upb/json/parser.rl"
#line 1229 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 22:
#line 1203 "upb/json/parser.rl"
#line 1231 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break;
case 23:
#line 1205 "upb/json/parser.rl"
#line 1233 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break;
case 24:
#line 1207 "upb/json/parser.rl"
#line 1235 "upb/json/parser.rl"
{ /* null value */ }
break;
case 25:
#line 1209 "upb/json/parser.rl"
#line 1237 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 26:
#line 1210 "upb/json/parser.rl"
#line 1238 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 27:
#line 1215 "upb/json/parser.rl"
#line 1243 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
#line 1487 "upb/json/parser.c"
#line 1515 "upb/json/parser.c"
}
}
@ -1496,7 +1524,7 @@ _again:
_out: {}
}
#line 1242 "upb/json/parser.rl"
#line 1270 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
@ -1537,13 +1565,13 @@ static void json_parser_reset(upb_json_parser *p) {
/* Emit Ragel initialization of the parser. */
#line 1541 "upb/json/parser.c"
#line 1569 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
#line 1282 "upb/json/parser.rl"
#line 1310 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
accumulate_clear(p);
@ -1553,10 +1581,73 @@ static void json_parser_reset(upb_json_parser *p) {
upb_status_clear(&p->status);
}
static void visit_json_parsermethod(const upb_refcounted *r,
upb_refcounted_visit *visit,
void *closure) {
const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
visit(r, upb_msgdef_upcast2(method->msg), closure);
}
static void free_json_parsermethod(upb_refcounted *r) {
upb_json_parsermethod *method = (upb_json_parsermethod*)r;
upb_inttable_iter i;
upb_inttable_begin(&i, &method->name_tables);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_value val = upb_inttable_iter_value(&i);
upb_strtable *t = upb_value_getptr(val);
upb_strtable_uninit(t);
free(t);
}
upb_inttable_uninit(&method->name_tables);
free(r);
}
static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
upb_msg_field_iter i;
upb_strtable *t;
/* It would be nice to stack-allocate this, but protobufs do not limit the
* length of fields to any reasonable limit. */
char *buf = NULL;
size_t len = 0;
if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
return;
}
/* TODO(haberman): handle malloc failure. */
t = malloc(sizeof(*t));
upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
size_t field_len = upb_fielddef_getjsonname(f, buf, len);
if (field_len > len) {
buf = realloc(buf, field_len);
len = field_len;
upb_fielddef_getjsonname(f, buf, len);
}
upb_strtable_insert(t, buf, upb_value_constptr(f));
if (upb_fielddef_issubmsg(f)) {
add_jsonname_table(m, upb_fielddef_msgsubdef(f));
}
}
free(buf);
}
/* Public API *****************************************************************/
upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
upb_json_parser *upb_json_parser_create(upb_env *env,
const upb_json_parsermethod *method,
upb_sink *output) {
#ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env);
#endif
@ -1564,17 +1655,16 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
if (!p) return false;
p->env = env;
p->method = method;
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
upb_bytessink_reset(&p->input_, &p->input_handler_, p);
upb_bytessink_reset(&p->input_, &method->input_handler_, p);
json_parser_reset(p);
upb_sink_reset(&p->top->sink, output->handlers, output->closure);
p->top->m = upb_handlers_msgdef(output->handlers);
set_name_table(p, p->top);
/* If this fails, uncomment and increase the value in parser.h. */
/* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
@ -1585,3 +1675,29 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
return &p->input_;
}
upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
const void* owner) {
static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
free_json_parsermethod};
upb_json_parsermethod *ret = malloc(sizeof(*ret));
upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
ret->msg = md;
upb_ref2(md, ret);
upb_byteshandler_init(&ret->input_handler_);
upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
add_jsonname_table(ret, md);
return ret;
}
const upb_byteshandler *upb_json_parsermethod_inputhandler(
const upb_json_parsermethod *m) {
return &m->input_handler_;
}

@ -15,11 +15,14 @@
namespace upb {
namespace json {
class Parser;
class ParserMethod;
} /* namespace json */
} /* namespace upb */
#endif
UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser)
UPB_DECLARE_DERIVED_TYPE(upb::json::ParserMethod, upb::RefCounted,
upb_json_parsermethod, upb_refcounted)
/* upb::json::Parser **********************************************************/
@ -27,7 +30,7 @@ UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser)
* constructed. This hint may be an overestimate for some build configurations.
* But if the parser library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_JSON_PARSER_SIZE 3704
#define UPB_JSON_PARSER_SIZE 4104
#ifdef __cplusplus
@ -35,7 +38,8 @@ UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser)
* sink. */
class upb::json::Parser {
public:
static Parser* Create(Environment* env, Sink* output);
static Parser* Create(Environment* env, const ParserMethod* method,
Sink* output);
BytesSink* input();
@ -43,25 +47,72 @@ class upb::json::Parser {
UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser)
};
class upb::json::ParserMethod {
public:
/* Include base methods from upb::ReferenceCounted. */
UPB_REFCOUNTED_CPPMETHODS
/* Returns handlers for parsing according to the specified schema. */
static reffed_ptr<const ParserMethod> New(const upb::MessageDef* md);
/* The destination handlers that are statically bound to this method.
* This method is only capable of outputting to a sink that uses these
* handlers. */
const Handlers* dest_handlers() const;
/* The input handlers for this decoder method. */
const BytesHandler* input_handler() const;
private:
UPB_DISALLOW_POD_OPS(ParserMethod, upb::json::ParserMethod)
};
#endif
UPB_BEGIN_EXTERN_C
upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output);
upb_json_parser* upb_json_parser_create(upb_env* e,
const upb_json_parsermethod* m,
upb_sink* output);
upb_bytessink *upb_json_parser_input(upb_json_parser *p);
upb_json_parsermethod* upb_json_parsermethod_new(const upb_msgdef* md,
const void* owner);
const upb_handlers *upb_json_parsermethod_desthandlers(
const upb_json_parsermethod *m);
const upb_byteshandler *upb_json_parsermethod_inputhandler(
const upb_json_parsermethod *m);
/* Include refcounted methods like upb_json_parsermethod_ref(). */
UPB_REFCOUNTED_CMETHODS(upb_json_parsermethod, upb_json_parsermethod_upcast)
UPB_END_EXTERN_C
#ifdef __cplusplus
namespace upb {
namespace json {
inline Parser* Parser::Create(Environment* env, Sink* output) {
return upb_json_parser_create(env, output);
inline Parser* Parser::Create(Environment* env, const ParserMethod* method,
Sink* output) {
return upb_json_parser_create(env, method, output);
}
inline BytesSink* Parser::input() {
return upb_json_parser_input(this);
}
inline const Handlers* ParserMethod::dest_handlers() const {
return upb_json_parsermethod_desthandlers(this);
}
inline const BytesHandler* ParserMethod::input_handler() const {
return upb_json_parsermethod_inputhandler(this);
}
/* static */
inline reffed_ptr<const ParserMethod> ParserMethod::New(
const MessageDef* md) {
const upb_json_parsermethod *m = upb_json_parsermethod_new(md, &m);
return reffed_ptr<const ParserMethod>(m, &m);
}
} /* namespace json */
} /* namespace upb */

@ -38,6 +38,9 @@ typedef struct {
const upb_msgdef *m;
const upb_fielddef *f;
/* The table mapping json name to fielddef for this message. */
upb_strtable *name_table;
/* We are in a repeated-field context, ready to emit mapentries as
* submessages. This flag alters the start-of-object (open-brace) behavior to
* begin a sequence of mapentry messages rather than a single submessage. */
@ -58,7 +61,7 @@ typedef struct {
struct upb_json_parser {
upb_env *env;
upb_byteshandler input_handler_;
const upb_json_parsermethod *method;
upb_bytessink input_;
/* Stack to track the JSON scopes we are in. */
@ -93,6 +96,19 @@ struct upb_json_parser {
uint32_t digit;
};
struct upb_json_parsermethod {
upb_refcounted base;
upb_byteshandler input_handler_;
/* Mainly for the purposes of refcounting, so all the fielddefs we point
* to stay alive. */
const upb_msgdef *msg;
/* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
upb_inttable name_tables;
};
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
/* Used to signal that a capture has been suspended. */
@ -121,6 +137,13 @@ static bool check_stack(upb_json_parser *p) {
return true;
}
static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
upb_value v;
bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
UPB_ASSERT_VAR(ok, ok);
frame->name_table = upb_value_getptr(v);
}
/* There are GCC/Clang built-ins for overflow checking which we could start
* using if there was any performance benefit to it. */
@ -717,6 +740,7 @@ static bool start_stringval(upb_json_parser *p) {
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
inner->name_table = NULL;
inner->is_map = false;
inner->is_mapentry = false;
p->top = inner;
@ -903,6 +927,7 @@ static bool handle_mapentry(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = mapentrymsg;
inner->name_table = NULL;
inner->mapfield = mapfield;
inner->is_map = false;
@ -939,20 +964,20 @@ static bool end_membername(upb_json_parser *p) {
} else {
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
upb_value v;
if (!f) {
if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
p->top->f = upb_value_getconstptr(v);
multipart_end(p);
return true;
} else {
/* TODO(haberman): Ignore unknown fields if requested/configured to do
* so. */
upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
upb_env_reporterror(p->env, &p->status);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
}
@ -994,6 +1019,7 @@ static bool start_subobject(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->name_table = NULL;
inner->mapfield = p->top->f;
inner->f = NULL;
inner->is_map = true;
@ -1014,6 +1040,7 @@ static bool start_subobject(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
set_name_table(p, inner);
inner->f = NULL;
inner->is_map = false;
inner->is_mapentry = false;
@ -1063,6 +1090,7 @@ static bool start_array(upb_json_parser *p) {
sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->name_table = NULL;
inner->f = p->top->f;
inner->is_map = false;
inner->is_mapentry = false;
@ -1288,10 +1316,75 @@ static void json_parser_reset(upb_json_parser *p) {
upb_status_clear(&p->status);
}
static void visit_json_parsermethod(const upb_refcounted *r,
upb_refcounted_visit *visit,
void *closure) {
const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
visit(r, upb_msgdef_upcast2(method->msg), closure);
}
static void free_json_parsermethod(upb_refcounted *r) {
upb_json_parsermethod *method = (upb_json_parsermethod*)r;
upb_inttable_iter i;
upb_inttable_begin(&i, &method->name_tables);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_value val = upb_inttable_iter_value(&i);
upb_strtable *t = upb_value_getptr(val);
upb_strtable_uninit(t);
free(t);
}
upb_inttable_uninit(&method->name_tables);
free(r);
}
static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
upb_msg_field_iter i;
upb_strtable *t;
/* It would be nice to stack-allocate this, but protobufs do not limit the
* length of fields to any reasonable limit. */
char *buf = NULL;
size_t len = 0;
if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
return;
}
/* TODO(haberman): handle malloc failure. */
t = malloc(sizeof(*t));
upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
size_t field_len = upb_fielddef_getjsonname(f, buf, len);
if (field_len > len) {
size_t len2;
buf = realloc(buf, field_len);
len = field_len;
len2 = upb_fielddef_getjsonname(f, buf, len);
UPB_ASSERT_VAR(len2, len == len2);
}
upb_strtable_insert(t, buf, upb_value_constptr(f));
if (upb_fielddef_issubmsg(f)) {
add_jsonname_table(m, upb_fielddef_msgsubdef(f));
}
}
free(buf);
}
/* Public API *****************************************************************/
upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
upb_json_parser *upb_json_parser_create(upb_env *env,
const upb_json_parsermethod *method,
upb_sink *output) {
#ifndef NDEBUG
const size_t size_before = upb_env_bytesallocated(env);
#endif
@ -1299,17 +1392,16 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
if (!p) return false;
p->env = env;
p->method = method;
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
upb_bytessink_reset(&p->input_, &p->input_handler_, p);
upb_bytessink_reset(&p->input_, &method->input_handler_, p);
json_parser_reset(p);
upb_sink_reset(&p->top->sink, output->handlers, output->closure);
p->top->m = upb_handlers_msgdef(output->handlers);
set_name_table(p, p->top);
/* If this fails, uncomment and increase the value in parser.h. */
/* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
@ -1320,3 +1412,29 @@ upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
return &p->input_;
}
upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
const void* owner) {
static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
free_json_parsermethod};
upb_json_parsermethod *ret = malloc(sizeof(*ret));
upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
ret->msg = md;
upb_ref2(md, ret);
upb_byteshandler_init(&ret->input_handler_);
upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
add_jsonname_table(ret, md);
return ret;
}
const upb_byteshandler *upb_json_parsermethod_inputhandler(
const upb_json_parsermethod *m) {
return &m->input_handler_;
}

@ -33,15 +33,28 @@ struct upb_json_printer {
/* StringPiece; a pointer plus a length. */
typedef struct {
const char *ptr;
char *ptr;
size_t len;
} strpc;
void freestrpc(void *ptr) {
strpc *pc = ptr;
free(pc->ptr);
free(pc);
}
/* Convert fielddef name to JSON name and return as a string piece. */
strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
/* TODO(haberman): handle malloc failure. */
strpc *ret = malloc(sizeof(*ret));
ret->ptr = upb_fielddef_name(f);
ret->len = strlen(ret->ptr);
upb_handlers_addcleanup(h, ret, free);
size_t len;
ret->len = upb_fielddef_getjsonname(f, NULL, 0);
ret->ptr = malloc(ret->len);
len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
UPB_ASSERT_VAR(len, len == ret->len);
ret->len--; /* NULL */
upb_handlers_addcleanup(h, ret, freestrpc);
return ret;
}

Loading…
Cancel
Save