Changes from Google-internal development.

* JSON parser expanded to handle split buffers.
 * bugfix to the protobuf decoder.
pull/13171/head
Josh Haberman 10 years ago
parent d18475ae57
commit 87fc2c516b
  1. 2
      Makefile
  2. 84
      tests/json/test_json.cc
  3. 98
      tests/pb/test_decoder.cc
  4. 17
      tests/test_table.cc
  5. 130
      tests/test_util.h
  6. 1086
      upb/json/parser.c
  7. 21
      upb/json/parser.h
  8. 788
      upb/json/parser.rl
  9. 17
      upb/pb/compile_decoder.c
  10. 3
      upb/pb/compile_decoder_x64.dasc
  11. 7
      upb/pb/compile_decoder_x64.h
  12. 6
      upb/pb/decoder.c
  13. 4
      upb/pb/decoder.int.h
  14. 6
      upb/upb.c

@ -15,7 +15,7 @@
# Threading:
# * -DUPB_THREAD_UNSAFE: remove all thread-safety.
.PHONY: all lib clean tests test benchmark descriptorgen amalgamate
.PHONY: all lib clean tests test descriptorgen amalgamate
.PHONY: clean_leave_profile
# Prevents the deletion of intermediate files.

@ -6,6 +6,7 @@
* A set of tests for JSON parsing and serialization.
*/
#include "tests/test_util.h"
#include "tests/upb_test.h"
#include "upb/handlers.h"
#include "upb/symtab.h"
@ -27,6 +28,8 @@ struct TestCase {
const char* expected;
};
bool verbose = false;
static TestCase kTestRoundtripMessages[] = {
// Test most fields here.
{
@ -190,6 +193,51 @@ class StringSink {
std::string s_;
};
void test_json_roundtrip_message(const char* json_src,
const char* json_expected,
const upb::Handlers* serialize_handlers,
int seam) {
upb::Status st;
upb::json::Parser parser(&st);
upb::json::Printer printer(serialize_handlers);
StringSink data_sink;
parser.ResetOutput(printer.input());
printer.ResetOutput(data_sink.Sink());
upb::BytesSink* input = parser.input();
void *sub;
size_t len = strlen(json_src);
size_t ofs = 0;
bool ok = input->Start(0, &sub) &&
parse_buffer(input, sub, json_src, 0, seam, &ofs, &st, verbose) &&
parse_buffer(input, sub, json_src, seam, len, &ofs, &st, verbose) &&
ofs == len;
if (ok) {
if (verbose) {
fprintf(stderr, "calling end()\n");
}
ok = input->End();
}
if (!ok) {
fprintf(stderr, "upb parse error: %s\n", st.error_message());
}
ASSERT(ok);
if (memcmp(json_expected,
data_sink.Data().data(),
data_sink.Data().size())) {
fprintf(stderr,
"JSON parse/serialize roundtrip result differs:\n"
"Original:\n%s\nParsed/Serialized:\n%s\n",
json_src, data_sink.Data().c_str());
abort();
}
}
// Starts with a message in JSON format, parses and directly serializes again,
// and compares the result.
void test_json_roundtrip() {
@ -200,36 +248,14 @@ void test_json_roundtrip() {
for (const TestCase* test_case = kTestRoundtripMessages;
test_case->input != NULL; test_case++) {
const char *expected =
(test_case->expected == EXPECT_SAME) ?
test_case->input :
test_case->expected;
const char *json_src = test_case->input;
const char *json_expected = test_case->expected;
if (json_expected == EXPECT_SAME) {
json_expected = json_src;
}
upb::Status st;
upb::json::Parser parser(&st);
upb::json::Printer printer(serialize_handlers.get());
StringSink data_sink;
parser.ResetOutput(printer.input());
printer.ResetOutput(data_sink.Sink());
bool ok = upb::BufferSource::PutBuffer(json_src, strlen(json_src),
parser.input());
if (!ok) {
fprintf(stderr, "upb parse error: %s\n", st.error_message());
}
ASSERT(ok);
if (memcmp(json_expected,
data_sink.Data().data(),
data_sink.Data().size())) {
fprintf(stderr,
"JSON parse/serialize roundtrip result differs:\n"
"Original:\n%s\nParsed/Serialized:\n%s\n",
json_src, data_sink.Data().c_str());
abort();
for (int i = 0; i < strlen(test_case->input); i++) {
test_json_roundtrip_message(test_case->input, expected,
serialize_handlers.get(), i);
}
}
}

@ -36,11 +36,17 @@
#include <stdlib.h>
#include <string.h>
#include "tests/test_util.h"
#include "tests/upb_test.h"
#ifdef AMALGAMATED
#include "upb.h"
#else // AMALGAMATED
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/pb/varint.int.h"
#include "upb/upb.h"
#endif // !AMALGAMATED
#undef PRINT_FAILURE
#define PRINT_FAILURE(expr) \
@ -62,7 +68,6 @@ uint32_t filter_hash = 0;
double completed;
double total;
double *count;
upb::BufferHandle global_handle;
enum TestMode {
COUNT_ONLY = 1,
@ -525,55 +530,16 @@ void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) {
ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED));
}
bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf,
size_t start, size_t end, size_t* ofs, upb::Status* status) {
static bool parse(upb::pb::Decoder* decoder, void* subc, const char* buf,
size_t start, size_t end, size_t* ofs, upb::Status* status) {
CheckBytesParsed(*decoder, *ofs);
upb::BytesSink* s = decoder->input();
start = UPB_MAX(start, *ofs);
if (start <= end) {
size_t len = end - start;
if (filter_hash) {
fprintf(stderr, "Calling parse(%zu) for bytes %zu-%zu of the input\n",
len, start, end);
}
size_t parsed = s->PutBuffer(subc, buf + start, len, &global_handle);
if (filter_hash) {
if (parsed == len) {
fprintf(stderr,
"parse(%zu) = %zu, complete byte count indicates success\n",
len, len);
} else if (parsed > len) {
fprintf(stderr,
"parse(%zu) = %zu, long byte count indicates success and skip"
"of the next %zu bytes\n",
len, parsed, parsed - len);
} else {
fprintf(stderr,
"parse(%zu) = %zu, short byte count indicates failure; "
"last %zu bytes were not consumed\n",
len, parsed, len - parsed);
}
}
if (status->ok() != (parsed >= len)) {
if (status->ok()) {
fprintf(stderr,
"Error: decode function returned short byte count but set no "
"error status\n");
} else {
fprintf(stderr,
"Error: decode function returned complete byte count but set "
"error status\n");
}
fprintf(stderr, "Status: %s, parsed=%zu, len=%zu\n",
status->error_message(), parsed, len);
ASSERT(false);
}
if (!status->ok())
return false;
*ofs += parsed;
bool ret = parse_buffer(decoder->input(), subc, buf, start, end, ofs, status,
filter_hash != 0);
if (ret) {
CheckBytesParsed(*decoder, *ofs);
}
return true;
return ret;
}
#define LINE(x) x "\n"
@ -1148,7 +1114,41 @@ void test_emptyhandlers(bool allowjit) {
upb::reffed_ptr<upb::Handlers> h(upb::Handlers::New(md.get()));
bool ok = h->Freeze(NULL);
ASSERT(ok);
NewMethod(h.get(), allowjit);
upb::reffed_ptr<const upb::pb::DecoderMethod> method =
NewMethod(h.get(), allowjit);
ASSERT(method.get());
// TODO: also test the case where a message has fields, but the fields are
// submessage fields and have no handlers. This also results in a decoder
// method with no field-handling code.
// Ensure that the method can run with empty and non-empty input.
string test_unknown_field_msg =
cat(tag(1, UPB_WIRE_TYPE_VARINT), varint(42),
tag(2, UPB_WIRE_TYPE_DELIMITED), delim("My test data"));
const struct {
const char* data;
size_t length;
} testdata[] = {
{ "", 0 },
{ test_unknown_field_msg.data(), test_unknown_field_msg.size() },
{ NULL, 0 },
};
for (int i = 0; testdata[i].data; i++) {
upb::Status status;
upb::pb::Decoder decoder(method.get(), &status);
upb::Sink sink(global_handlers, &closures[0]);
decoder.ResetOutput(&sink);
upb::BytesSink* input = decoder.input();
void* subc;
ASSERT(input->Start(0, &subc));
size_t ofs = 0;
ASSERT(parse_buffer(input, subc,
testdata[i].data, 0, testdata[i].length,
&ofs, &status, false));
ASSERT(ofs == testdata[i].length);
ASSERT(input->End());
}
}
void run_tests(bool use_jit) {
@ -1166,7 +1166,7 @@ void run_tests(bool use_jit) {
test_invalid();
test_valid();
test_emptyhandlers(false);
test_emptyhandlers(use_jit);
}
void run_test_suite() {

@ -15,7 +15,6 @@
#include <set>
#include <string>
#include <vector>
#include "tests/test_util.h"
#include "tests/upb_test.h"
#include "upb/table.int.h"
@ -214,7 +213,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
x += (uintptr_t)ok;
}
double total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
printf("%ld/s\n", (long)(i/total));
double upb_seq_i = i / 100; // For later percentage calcuation.
printf("upb_inttable(rand): ");
fflush(stdout);
@ -227,7 +227,8 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
x += (uintptr_t)ok;
}
total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
printf("%ld/s\n", (long)(i/total));
double upb_rand_i = i / 100; // For later percentage calculation.
printf("std::map<int32_t, int32_t>(seq): ");
fflush(stdout);
@ -238,7 +239,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
x += m[key];
}
total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
printf("%ld/s (%0.1f%% of upb)\n", (long)(i/total), i / upb_seq_i);
printf("std::map<int32_t, int32_t>(rand): ");
fflush(stdout);
@ -249,7 +250,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
x += m[key];
}
total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
printf("%ld/s (%0.1f%% of upb)\n", (long)(i/total), i / upb_rand_i);
printf("__gnu_cxx::hash_map<uint32_t, uint32_t>(seq): ");
fflush(stdout);
@ -260,7 +261,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
x += hm[key];
}
total = get_usertime() - before;
printf("%s/s\n", eng(i/total, 3, false));
printf("%ld/s (%0.1f%% of upb)\n", (long)(i/total), i / upb_seq_i);
printf("__gnu_cxx::hash_map<uint32_t, uint32_t>(rand): ");
fflush(stdout);
@ -272,7 +273,7 @@ void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
}
total = get_usertime() - before;
if (x == INT_MAX) abort();
printf("%s/s\n\n", eng(i/total, 3, false));
printf("%ld/s (%0.1f%% of upb)\n\n", (long)(i/total), i / upb_rand_i);
upb_inttable_uninit(&table);
delete rand_order;
}
@ -308,7 +309,7 @@ extern "C" {
int run_tests(int argc, char *argv[]) {
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--benchmark") == 0) benchmark = true;
if (strcmp(argv[i], "benchmark") == 0) benchmark = true;
}
vector<std::string> keys;

@ -1,53 +1,89 @@
/* Function for printing numbers using si prefixes (k, M, G, etc.).
* From http://www.cs.tut.fi/~jkorpela/c/eng.html */
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2014 Google Inc. See LICENSE for details.
*
* Common functionality for tests.
*/
#define PREFIX_START (-24)
/* Smallest power of then for which there is a prefix defined.
If the set of prefixes will be extended, change this constant
and update the table "prefix". */
#ifndef UPB_TEST_UTIL_H_
#define UPB_TEST_UTIL_H_
#include <stdio.h>
#include <math.h>
#include "tests/upb_test.h"
#include "upb/sink.h"
static char *eng(double value, int digits, int numeric)
{
static const char *prefix[] = {
"y", "z", "a", "f", "p", "n", "u", "m", "",
"k", "M", "G", "T", "P", "E", "Z", "Y"
};
#define PREFIX_END (PREFIX_START+\
(int)((sizeof(prefix)/sizeof(char *)-1)*3))
int expof10;
static char result[100];
char *res = result;
if (value < 0.)
{
*res++ = '-';
value = -value;
}
expof10 = (int) log10(value);
if(expof10 > 0)
expof10 = (expof10/3)*3;
else
expof10 = (-expof10+3)/3*(-3);
value *= pow(10,-expof10);
if (value >= 1000.)
{ value /= 1000.0; expof10 += 3; }
else if(value >= 100.0)
digits -= 2;
else if(value >= 10.0)
digits -= 1;
if(numeric || (expof10 < PREFIX_START) ||
(expof10 > PREFIX_END))
sprintf(res, "%.*fe%d", digits-1, value, expof10);
else
sprintf(res, "%.*f %s", digits-1, value,
prefix[(expof10-PREFIX_START)/3]);
return result;
upb::BufferHandle global_handle;
// Puts a region of the given buffer [start, end) into the given sink (which
// probably represents a parser. Can gracefully handle the case where the
// parser returns a "parsed" length that is less or greater than the input
// buffer length, and tracks the overall parse offset in *ofs.
//
// Pass verbose=true to print detailed diagnostics to stderr.
bool parse_buffer(upb::BytesSink* sink, void* subc, const char* buf,
size_t start, size_t end, size_t* ofs,
upb::Status* status, bool verbose) {
start = UPB_MAX(start, *ofs);
if (start <= end) {
size_t len = end - start;
// Copy buffer into a separate, temporary buffer.
// This is necessary to verify that the parser is not erroneously
// reading outside the specified bounds.
char *buf2 = (char*)malloc(len);
assert(buf2);
memcpy(buf2, buf + start, len);
if (verbose) {
fprintf(stderr, "Calling parse(%zu) for bytes %zu-%zu of the input\n",
len, start, end);
}
size_t parsed = sink->PutBuffer(subc, buf2, len, &global_handle);
free(buf2);
if (verbose) {
if (parsed == len) {
fprintf(stderr,
"parse(%zu) = %zu, complete byte count indicates success\n",
len, len);
} else if (parsed > len) {
fprintf(stderr,
"parse(%zu) = %zu, long byte count indicates success and skip"
"of the next %zu bytes\n",
len, parsed, parsed - len);
} else {
fprintf(stderr,
"parse(%zu) = %zu, short byte count indicates failure; "
"last %zu bytes were not consumed\n",
len, parsed, len - parsed);
}
}
if (status->ok() != (parsed >= len)) {
if (status->ok()) {
fprintf(stderr,
"Error: decode function returned short byte count but set no "
"error status\n");
} else {
fprintf(stderr,
"Error: decode function returned complete byte count but set "
"error status\n");
}
fprintf(stderr, "Status: %s, parsed=%zu, len=%zu\n",
status->error_message(), parsed, len);
ASSERT(false);
}
if (!status->ok())
return false;
*ofs += parsed;
}
return true;
}
#endif

File diff suppressed because it is too large Load Diff

@ -69,15 +69,24 @@ UPB_DEFINE_STRUCT0(upb_json_parser,
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
// A pointer to the beginning of whatever text we are currently parsing.
const char *text_begin;
// The handle for the current buffer.
const upb_bufhandle *handle;
// We have to accumulate text for member names, integers, unicode escapes, and
// base64 partial results.
// Accumulate buffer. See details in parser.rl.
const char *accumulated;
size_t accumulated_len;
// TODO: add members and code for allocating a buffer when necessary (when the
// member spans input buffers or contains escapes).
char *accumulate_buf;
size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
uint32_t digit;
));
UPB_BEGIN_EXTERN_C

@ -33,6 +33,9 @@
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
static char suspend_capture;
static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
upb_handlertype_t type) {
upb_selector_t sel;
@ -46,41 +49,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) {
p, upb_handlers_getprimitivehandlertype(p->top->f));
}
static void start_member(upb_json_parser *p) {
assert(!p->top->f);
assert(!p->accumulated);
p->accumulated_len = 0;
}
static bool end_member(upb_json_parser *p) {
// TODO(haberman): support keys that span buffers or have escape sequences.
assert(!p->top->f);
assert(p->accumulated);
const upb_fielddef *f =
upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n",
(int)p->accumulated_len, p->accumulated);
return false;
}
p->top->f = f;
p->accumulated = NULL;
return true;
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
static bool check_stack(upb_json_parser *p) {
if ((p->top + 1) == p->limit) {
upb_status_seterrmsg(p->status, "Nesting too deep");
@ -90,83 +58,28 @@ static bool check_stack(upb_json_parser *p) {
return true;
}
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
// There are GCC/Clang built-ins for overflow checking which we could start
// using if there was any performance benefit to it.
static bool checked_add(size_t a, size_t b, size_t *c) {
if (SIZE_MAX - a < b) return false;
*c = a + b;
return true;
}
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
static bool start_array(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_isseq(p->top->f)) {
upb_status_seterrf(p->status,
"Array specified for non-repeated field: %s",
upb_fielddef_name(p->top->f));
return false;
static size_t saturating_multiply(size_t a, size_t b) {
// size_t is unsigned, so this is defined behavior even on overflow.
size_t ret = a * b;
if (b != 0 && ret / b != a) {
ret = SIZE_MAX;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
return ret;
}
static void end_array(upb_json_parser *p) {
assert(p->top > p->stack);
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
}
/* Base64 decoding ************************************************************/
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool parser_putbool(upb_json_parser *p, bool val) {
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(p->status,
"Boolean value specified for non-bool field: %s",
upb_fielddef_name(p->top->f));
return false;
}
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
static void start_text(upb_json_parser *p, const char *ptr) {
p->text_begin = ptr;
}
// TODO(haberman): make this streaming.
static const signed char b64table[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
@ -286,89 +199,323 @@ badpadding:
return false;
}
static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin;
if (p->top->f && upb_fielddef_isstring(p->top->f)) {
// This is a string field (as opposed to a member name).
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) {
PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated,
p->accumulated_len));
} else {
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
/* Accumulate buffer **********************************************************/
// Functionality for accumulating a buffer.
//
// Some parts of the parser need an entire value as a contiguous string. For
// example, to look up a member name in a hash table, or to turn a string into
// a number, the relevant library routines need the input string to be in
// contiguous memory, even if the value spanned two or more buffers in the
// input. These routines handle that.
//
// In the common case we can just point to the input buffer to get this
// contiguous string and avoid any actual copy. So we optimistically begin
// this way. But there are a few cases where we must instead copy into a
// separate buffer:
//
// 1. The string was not contiguous in the input (it spanned buffers).
//
// 2. The string included escape sequences that need to be interpreted to get
// the true value in a contiguous buffer.
static void assert_accumulate_empty(upb_json_parser *p) {
UPB_UNUSED(p);
assert(p->accumulated == NULL);
assert(p->accumulated_len == 0);
}
static void accumulate_clear(upb_json_parser *p) {
p->accumulated = NULL;
p->accumulated_len = 0;
}
// Used internally by accumulate_append().
static bool accumulate_realloc(upb_json_parser *p, size_t need) {
size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
void *mem = realloc(p->accumulate_buf, new_size);
if (!mem) {
upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
return false;
}
p->accumulate_buf = mem;
p->accumulate_buf_size = new_size;
return true;
}
// Logically appends the given data to the append buffer.
// If "can_alias" is true, we will try to avoid actually copying, but the buffer
// must be valid until the next accumulate_append() call (if any).
static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
bool can_alias) {
if (!p->accumulated && can_alias) {
p->accumulated = buf;
p->accumulated_len = len;
return true;
}
if (p->accumulate_buf_size - p->accumulated_len < len) {
size_t need;
if (!checked_add(p->accumulated_len, len, &need)) {
upb_status_seterrmsg(p->status, "Integer overflow.");
return false;
}
p->accumulated = NULL;
} else if (p->top->f &&
upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
!is_num) {
// Enum case: resolve enum symbolic name to integer value.
const upb_enumdef *enumdef =
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
int32_t int_val = 0;
if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
&int_val)) {
upb_selector_t sel = parser_getsel(p);
upb_sink_putint32(&p->top->sink, sel, int_val);
} else {
upb_status_seterrmsg(p->status, "Enum value name unknown");
if (!accumulate_realloc(p, need)) {
return false;
}
p->accumulated = NULL;
}
if (p->accumulated != p->accumulate_buf) {
memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
p->accumulated = p->accumulate_buf;
}
memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
p->accumulated_len += len;
return true;
}
static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
// Returns a pointer to the data accumulated since the last accumulate_clear()
// call, and writes the length to *len. This with point either to the input
// buffer or a temporary accumulate buffer.
static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
assert(p->accumulated);
*len = p->accumulated_len;
return p->accumulated;
}
if (upb_fielddef_isstring(p->top->f)) {
if (!check_stack(p)) return false;
// Start a new parser frame: parser frames correspond one-to-one with
// handler frames, and string events occur in a sub-frame.
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
/* Mult-part text data ********************************************************/
// When we have text data in the input, it can often come in multiple segments.
// For example, there may be some raw string data followed by an escape
// sequence. The two segments are processed with different logic. Also buffer
// seams in the input can cause multiple segments.
//
// As we see segments, there are two main cases for how we want to process them:
//
// 1. we want to push the captured input directly to string handlers.
//
// 2. we need to accumulate all the parts into a contiguous buffer for further
// processing (field name lookup, string->number conversion, etc).
// This is the set of states for p->multipart_state.
enum {
// We are not currently processing multipart data.
MULTIPART_INACTIVE = 0,
// We are processing multipart data by accumulating it into a contiguous
// buffer.
MULTIPART_ACCUMULATE = 1,
// We are processing multipart data by pushing each part directly to the
// current string handlers.
MULTIPART_PUSHEAGERLY = 2
};
return true;
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// Do nothing -- symbolic enum names in quotes remain in the
// current parser frame.
// Start a multi-part text value where we accumulate the data for processing at
// the end.
static void multipart_startaccum(upb_json_parser *p) {
assert_accumulate_empty(p);
assert(p->multipart_state == MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_ACCUMULATE;
}
// Start a multi-part text value where we immediately push text data to a string
// value with the given selector.
static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
assert_accumulate_empty(p);
assert(p->multipart_state == MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_PUSHEAGERLY;
p->string_selector = sel;
}
static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
bool can_alias) {
switch (p->multipart_state) {
case MULTIPART_INACTIVE:
upb_status_seterrmsg(
p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
return false;
case MULTIPART_ACCUMULATE:
if (!accumulate_append(p, buf, len, can_alias)) {
return false;
}
break;
case MULTIPART_PUSHEAGERLY: {
const upb_bufhandle *handle = can_alias ? p->handle : NULL;
upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
break;
}
}
return true;
}
// Note: this invalidates the accumulate buffer! Call only after reading its
// contents.
static void multipart_end(upb_json_parser *p) {
assert(p->multipart_state != MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_INACTIVE;
accumulate_clear(p);
}
/* Input capture **************************************************************/
// Functionality for capturing a region of the input as text. Gracefully
// handles the case where a buffer seam occurs in the middle of the captured
// region.
static void capture_begin(upb_json_parser *p, const char *ptr) {
assert(p->multipart_state != MULTIPART_INACTIVE);
assert(p->capture == NULL);
p->capture = ptr;
}
static bool capture_end(upb_json_parser *p, const char *ptr) {
assert(p->capture);
if (multipart_text(p, p->capture, ptr - p->capture, true)) {
p->capture = NULL;
return true;
} else {
upb_status_seterrf(p->status,
"String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false;
}
}
// This is called at the end of each input buffer (ie. when we have hit a
// buffer seam). If we are in the middle of capturing the input, this
// processes the unprocessed capture region.
static void capture_suspend(upb_json_parser *p, const char **ptr) {
if (!p->capture) return;
if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
// We use this as a signal that we were in the middle of capturing, and
// that capturing should resume at the beginning of the next buffer.
//
// We can't use *ptr here, because we have no guarantee that this pointer
// will be valid when we resume (if the underlying memory is freed, then
// using the pointer at all, even to compare to NULL, is likely undefined
// behavior).
p->capture = &suspend_capture;
} else {
// Need to back up the pointer to the beginning of the capture, since
// we were not able to actually preserve it.
*ptr = p->capture;
}
}
static void end_stringval(upb_json_parser *p) {
if (upb_fielddef_isstring(p->top->f)) {
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&p->top->sink, sel);
p->top--;
static void capture_resume(upb_json_parser *p, const char *ptr) {
if (p->capture) {
assert(p->capture == &suspend_capture);
p->capture = ptr;
}
}
/* Callbacks from the parser **************************************************/
// These are the functions called directly from the parser itself.
// We define these in the same order as their declarations in the parser.
static char escape_char(char in) {
switch (in) {
case 'r': return '\r';
case 't': return '\t';
case 'n': return '\n';
case 'f': return '\f';
case 'b': return '\b';
case '/': return '/';
case '"': return '"';
case '\\': return '\\';
default:
assert(0);
return 'x';
}
}
static bool escape(upb_json_parser *p, const char *ptr) {
char ch = escape_char(*ptr);
return multipart_text(p, &ch, 1, false);
}
static void start_hex(upb_json_parser *p) {
p->digit = 0;
}
static void hexdigit(upb_json_parser *p, const char *ptr) {
char ch = *ptr;
p->digit <<= 4;
if (ch >= '0' && ch <= '9') {
p->digit += (ch - '0');
} else if (ch >= 'a' && ch <= 'f') {
p->digit += ((ch - 'a') + 10);
} else {
assert(ch >= 'A' && ch <= 'F');
p->digit += ((ch - 'A') + 10);
}
}
static bool end_hex(upb_json_parser *p) {
uint32_t codepoint = p->digit;
// emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0;
if (codepoint <= 0x7F) {
utf8[0] = codepoint;
length = 1;
} else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2;
} else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x0F) | 0xE0;
length = 3;
}
// TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
// we have to wait for the next escape to get the full code point).
return multipart_text(p, utf8, length, false);
}
static void start_text(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool end_text(upb_json_parser *p, const char *ptr) {
return capture_end(p, ptr);
}
static void start_number(upb_json_parser *p, const char *ptr) {
start_text(p, ptr);
assert(p->accumulated == NULL);
multipart_startaccum(p);
capture_begin(p, ptr);
}
static void end_number(upb_json_parser *p, const char *ptr) {
end_text(p, ptr, true);
const char *myend = p->accumulated + p->accumulated_len;
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len;
char *end;
switch (upb_fielddef_type(p->top->f)) {
@ -376,7 +523,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_INT32: {
long val = strtol(p->accumulated, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
assert(false);
goto err;
else
upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
break;
@ -384,7 +531,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_INT64: {
long long val = strtoll(p->accumulated, &end, 0);
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
assert(false);
goto err;
else
upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
break;
@ -392,7 +539,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_UINT32: {
unsigned long val = strtoul(p->accumulated, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || end != myend)
assert(false);
goto err;
else
upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
break;
@ -400,7 +547,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_UINT64: {
unsigned long long val = strtoull(p->accumulated, &end, 0);
if (val > UINT64_MAX || errno == ERANGE || end != myend)
assert(false);
goto err;
else
upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
break;
@ -408,7 +555,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_DOUBLE: {
double val = strtod(p->accumulated, &end);
if (errno == ERANGE || end != myend)
assert(false);
goto err;
else
upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
break;
@ -416,7 +563,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_FLOAT: {
float val = strtof(p->accumulated, &end);
if (errno == ERANGE || end != myend)
assert(false);
goto err;
else
upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
break;
@ -425,84 +572,236 @@ static void end_number(upb_json_parser *p, const char *ptr) {
assert(false);
}
p->accumulated = NULL;
multipart_end(p);
return true;
err:
upb_status_seterrf(p->status, "error parsing number: %.*s", buf, len);
multipart_end(p);
return false;
}
static char escape_char(char in) {
switch (in) {
case 'r': return '\r';
case 't': return '\t';
case 'n': return '\n';
case 'f': return '\f';
case 'b': return '\b';
case '/': return '/';
case '"': return '"';
case '\\': return '\\';
static bool parser_putbool(upb_json_parser *p, bool val) {
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(p->status,
"Boolean value specified for non-bool field: %s",
upb_fielddef_name(p->top->f));
return false;
}
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
if (upb_fielddef_isstring(p->top->f)) {
if (!check_stack(p)) return false;
// Start a new parser frame: parser frames correspond one-to-one with
// handler frames, and string events occur in a sub-frame.
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
// For STRING fields we push data directly to the handlers as it is
// parsed. We don't do this yet for BYTES fields, because our base64
// decoder is not streaming.
//
// TODO(haberman): make base64 decoding streaming also.
multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
return true;
} else {
multipart_startaccum(p);
return true;
}
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// No need to push a frame -- symbolic enum names in quotes remain in the
// current parser frame.
//
// Enum string values must accumulate so we can look up the value in a table
// once it is complete.
multipart_startaccum(p);
return true;
} else {
upb_status_seterrf(p->status,
"String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false;
}
}
static bool end_stringval(upb_json_parser *p) {
bool ok = true;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_BYTES:
if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
p->accumulated, p->accumulated_len)) {
return false;
}
// Fall through.
case UPB_TYPE_STRING: {
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&p->top->sink, sel);
p->top--;
break;
}
case UPB_TYPE_ENUM: {
// Resolve enum symbolic name to integer value.
const upb_enumdef *enumdef =
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
size_t len;
const char *buf = accumulate_getptr(p, &len);
int32_t int_val = 0;
ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
if (ok) {
upb_selector_t sel = parser_getsel(p);
upb_sink_putint32(&p->top->sink, sel, int_val);
} else {
upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
}
break;
}
default:
assert(0);
return 'x';
assert(false);
upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
ok = false;
break;
}
multipart_end(p);
return ok;
}
static void escape(upb_json_parser *p, const char *ptr) {
char ch = escape_char(*ptr);
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL);
static void start_member(upb_json_parser *p) {
assert(!p->top->f);
multipart_startaccum(p);
}
static uint8_t hexdigit(char ch) {
if (ch >= '0' && ch <= '9') {
return ch - '0';
} else if (ch >= 'a' && ch <= 'f') {
return ch - 'a' + 10;
} else {
assert(ch >= 'A' && ch <= 'F');
return ch - 'A' + 10;
static bool end_member(upb_json_parser *p) {
assert(!p->top->f);
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
static void start_hex(upb_json_parser *p, const char *ptr) {
start_text(p, ptr);
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
return true;
}
static void hex(upb_json_parser *p, const char *end) {
const char *start = p->text_begin;
UPB_ASSERT_VAR(end, end - start == 4);
uint16_t codepoint =
(hexdigit(start[0]) << 12) |
(hexdigit(start[1]) << 8) |
(hexdigit(start[2]) << 4) |
hexdigit(start[3]);
// emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0;
if (codepoint <= 0x7F) {
utf8[0] = codepoint;
length = 1;
} else if (codepoint <= 0x07FF) {
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x1F) | 0xC0;
length = 2;
} else /* codepoint <= 0xFFFF */ {
utf8[2] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[1] = (codepoint & 0x3F) | 0x80;
codepoint >>= 6;
utf8[0] = (codepoint & 0x0F) | 0xE0;
length = 3;
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
static bool start_array(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_isseq(p->top->f)) {
upb_status_seterrf(p->status,
"Array specified for non-repeated field: %s",
upb_fielddef_name(p->top->f));
return false;
}
// TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
// we have to wait for the next escape to get the full code point).
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
}
static void end_array(upb_json_parser *p) {
assert(p->top > p->stack);
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
#define CHECK_RETURN_TOP(x) if (!(x)) goto error
/* The actual parser **********************************************************/
// What follows is the Ragel parser itself. The language is specified in Ragel
// and the actions call our C functions above.
//
// Ragel has an extensive set of functionality, and we use only a small part of
// it. There are many action types but we only use a few:
//
// ">" -- transition into a machine
// "%" -- transition out of a machine
// "@" -- transition into a final state of a machine.
//
// "@" transitions are tricky because a machine can transition into a final
// state repeatedly. But in some cases we know this can't happen, for example
// a string which is delimited by a final '"' can only transition into its
// final state once, when the closing '"' is seen.
%%{
machine json;
@ -520,24 +819,30 @@ static void hex(upb_json_parser *p, const char *end) {
text =
/[^\\"]/+
>{ start_text(parser, p); }
%{ CHECK_RETURN_TOP(end_text(parser, p, false)); }
%{ CHECK_RETURN_TOP(end_text(parser, p)); }
;
unicode_char =
"\\u"
/[0-9A-Fa-f]/{4}
>{ start_hex(parser, p); }
%{ hex(parser, p); }
>{ start_hex(parser); }
${ hexdigit(parser, p); }
%{ CHECK_RETURN_TOP(end_hex(parser)); }
;
escape_char =
"\\"
/[rtbfn"\/\\]/
>{ escape(parser, p); }
>{ CHECK_RETURN_TOP(escape(parser, p)); }
;
string_machine :=
(text | unicode_char | escape_char)**
'"'
@{ fhold; fret; }
;
string_machine := (text | unicode_char | escape_char)** '"' @{ fret; } ;
string = '"' @{ fcall string_machine; };
string = '"' @{ fcall string_machine; } '"';
value2 = ^(space | "]" | "}") >{ fhold; fcall value_machine; } ;
@ -545,7 +850,7 @@ static void hex(upb_json_parser *p, const char *end) {
ws
string
>{ start_member(parser); }
%{ CHECK_RETURN_TOP(end_member(parser)); }
@{ CHECK_RETURN_TOP(end_member(parser)); }
ws ":" ws
value2
%{ clear_member(parser); }
@ -573,10 +878,10 @@ static void hex(upb_json_parser *p, const char *end) {
value =
number
>{ start_number(parser, p); }
%{ end_number(parser, p); }
%{ CHECK_RETURN_TOP(end_number(parser, p)); }
| string
>{ CHECK_RETURN_TOP(start_stringval(parser)); }
%{ end_stringval(parser); }
@{ CHECK_RETURN_TOP(end_stringval(parser)); }
| "true"
%{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
| "false"
@ -602,6 +907,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_json_parser *parser = closure;
parser->handle = handle;
// Variables used by Ragel's generated code.
int cs = parser->current_state;
@ -611,10 +917,14 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const char *p = buf;
const char *pe = buf + size;
capture_resume(parser, buf);
%% write exec;
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
} else {
capture_suspend(parser, &p);
}
error:
@ -631,8 +941,13 @@ bool end(void *closure, const void *hd) {
return true;
}
/* Public API *****************************************************************/
void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
@ -642,6 +957,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
void upb_json_parser_uninit(upb_json_parser *p) {
upb_byteshandler_uninit(&p->input_handler_);
free(p->accumulate_buf);
}
void upb_json_parser_reset(upb_json_parser *p) {
@ -654,9 +970,9 @@ void upb_json_parser_reset(upb_json_parser *p) {
%% write init;
p->current_state = cs;
p->parser_top = top;
p->text_begin = NULL;
p->accumulated = NULL;
p->accumulated_len = 0;
accumulate_clear(p);
p->multipart_state = MULTIPART_INACTIVE;
p->capture = NULL;
}
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {

@ -302,6 +302,7 @@ static void putop(compiler *c, opcode op, ...) {
case OP_SETDELIM:
case OP_HALT:
case OP_RET:
case OP_DISPATCH:
put32(c, op);
break;
case OP_PARSE_DOUBLE:
@ -382,7 +383,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
OP(SETBIGGROUPNUM), OP(HALT),
OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
};
return op > OP_HALT ? names[0] : names[op];
#undef OP
@ -414,6 +415,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
upb_handlers_msgdef(method->dest_handlers_)));
break;
}
case OP_DISPATCH:
case OP_STARTMSG:
case OP_ENDMSG:
case OP_PUSHLENDELIM:
@ -759,6 +761,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
putop(c, OP_SETDISPATCH, &method->dispatch);
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD);
uint32_t* start_pc = c->pc;
upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
@ -774,8 +777,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
}
}
// If there were no fields, or if no handlers were defined, we need to
// generate a non-empty loop body so that we can at least dispatch for unknown
// fields and check for the end of the message.
if (c->pc == start_pc) {
// Check for end-of-message.
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
// Unconditionally dispatch.
putop(c, OP_DISPATCH, 0);
}
// For now we just loop back to the last field of the message (or if none,
// the DISPATCH opcode for the message.
// the DISPATCH opcode for the message).
putop(c, OP_BRANCH, -LABEL_FIELD);
// Insert both a label and a dispatch table entry for this end-of-msg.

@ -1124,6 +1124,9 @@ static void jitbytecode(jitcompiler *jc) {
jittag(jc, tag, arg >> 8, (int8_t)arg, method);
break;
}
case OP_DISPATCH:
| call =>jmptarget(jc, &method->dispatch)
break;
case OP_HALT:
assert(false);
}

@ -1680,6 +1680,11 @@ static void jitbytecode(jitcompiler *jc) {
jittag(jc, tag, arg >> 8, (int8_t)arg, method);
break;
}
case OP_DISPATCH:
//| call =>jmptarget(jc, &method->dispatch)
dasm_put(Dst, 2151, jmptarget(jc, &method->dispatch));
# 1129 "upb/pb/compile_decoder_x64.dasc"
break;
case OP_HALT:
assert(false);
}
@ -1688,5 +1693,5 @@ static void jitbytecode(jitcompiler *jc) {
asmlabel(jc, "eof");
//| nop
dasm_put(Dst, 1909);
# 1134 "upb/pb/compile_decoder_x64.dasc"
# 1137 "upb/pb/compile_decoder_x64.dasc"
}

@ -801,6 +801,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
if (result == DECODE_MISMATCH) goto badtag;
if (result >= 0) return result;
})
VMCASE(OP_DISPATCH, {
CHECK_RETURN(dispatch(d));
})
VMCASE(OP_HALT, {
return size;
})
@ -859,7 +862,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
// Rewind from OP_TAG* to OP_CHECKDELIM.
assert(getop(*d->pc) == OP_TAG1 ||
getop(*d->pc) == OP_TAG2 ||
getop(*d->pc) == OP_TAGN);
getop(*d->pc) == OP_TAGN ||
getop(*d->pc == OP_DISPATCH));
d->pc = p;
}
upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);

@ -66,7 +66,9 @@ typedef enum {
// | unused (24) | opc |
// | upb_inttable* (32 or 64) |
OP_HALT = 36, // No arg.
OP_DISPATCH = 36, // No arg.
OP_HALT = 37, // No arg.
} opcode;
#define OP_MAX OP_HALT

@ -32,8 +32,10 @@ static void nullz(upb_status *status) {
}
void upb_status_clear(upb_status *status) {
upb_status blank = UPB_STATUS_INIT;
upb_status_copy(status, &blank);
if (!status) return;
status->ok_ = true;
status->code_ = 0;
status->msg[0] = '\0';
}
bool upb_ok(const upb_status *status) { return status->ok_; }

Loading…
Cancel
Save