New encode/decode: most (171 / 192) conformance tests pass.

pull/13171/head
Joshua Haberman 8 years ago
parent 4da95f6213
commit be9094d91a
  1. 21
      tests/conformance_upb_failures.txt
  2. 6
      tools/make_c_api.lua
  3. 727
      upb/decode.c
  4. 88
      upb/encode.c
  5. 2
      upb/msg.c
  6. 7
      upb/msg.h
  7. 14
      upb/pb/varint.int.h
  8. 12
      upb/upb.h

@ -0,0 +1,21 @@
Recommended.ProtobufInput.OneofZeroBool.ProtobufOutput
Recommended.ProtobufInput.OneofZeroBytes.ProtobufOutput
Recommended.ProtobufInput.OneofZeroDouble.ProtobufOutput
Recommended.ProtobufInput.OneofZeroEnum.ProtobufOutput
Recommended.ProtobufInput.OneofZeroFloat.ProtobufOutput
Recommended.ProtobufInput.OneofZeroString.ProtobufOutput
Recommended.ProtobufInput.OneofZeroUint32.ProtobufOutput
Recommended.ProtobufInput.OneofZeroUint64.ProtobufOutput
Required.ProtobufInput.PrematureEofInSubmessageValue.MESSAGE
Required.ProtobufInput.RepeatedScalarSelectsLast.BOOL.ProtobufOutput
Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED32.ProtobufOutput
Required.ProtobufInput.RepeatedScalarSelectsLast.FIXED64.ProtobufOutput
Required.ProtobufInput.RepeatedScalarSelectsLast.UINT64.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.BOOL.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.INT32.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.INT64.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.SINT32.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.SINT64.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.UINT32.ProtobufOutput
Required.ProtobufInput.ValidDataRepeated.UINT64.ProtobufOutput
Required.ProtobufInput.ValidDataScalar.BOOL.ProtobufOutput

@ -417,7 +417,7 @@ local function write_c_file(filedef, hfilename, append)
append('static const upb_msglayout_fieldinit_v1 %s[%s] = {\n', append('static const upb_msglayout_fieldinit_v1 %s[%s] = {\n',
fields_array_name, field_count) fields_array_name, field_count)
for _, field in ipairs(fields_number_order) do for _, field in ipairs(fields_number_order) do
local submsg_index = "-1" local submsg_index = "UPB_NO_SUBMSG"
local oneof_index = "UPB_NOT_IN_ONEOF" local oneof_index = "UPB_NOT_IN_ONEOF"
if field:type() == upb.TYPE_MESSAGE then if field:type() == upb.TYPE_MESSAGE then
submsg_index = submsg_indexes[field:subdef()] submsg_index = submsg_indexes[field:subdef()]
@ -430,7 +430,7 @@ local function write_c_file(filedef, hfilename, append)
field:number(), field:number(),
msgname, msgname,
(field:containing_oneof() and field:containing_oneof():name()) or field:name(), (field:containing_oneof() and field:containing_oneof():name()) or field:name(),
hasbit_indexes[field] or "-1", hasbit_indexes[field] or "UPB_NO_HASBIT",
oneof_index, oneof_index,
submsg_index, submsg_index,
field:descriptor_type(), field:descriptor_type(),
@ -448,7 +448,7 @@ local function write_c_file(filedef, hfilename, append)
msgname, field_count, msgname, field_count,
0, -- TODO: oneof_count 0, -- TODO: oneof_count
'false', -- TODO: extendable 'false', -- TODO: extendable
'true' -- TODO: is_proto2 msg:file():syntax() == upb.SYNTAX_PROTO2
) )
append('};\n\n') append('};\n\n')

@ -1,16 +1,32 @@
#include "upb/upb.h"
#include "upb/decode.h" #include "upb/decode.h"
#include "upb/structs.int.h" #include "upb/structs.int.h"
typedef enum { /* Maps descriptor type -> upb field type. */
UPB_WIRE_TYPE_VARINT = 0, static const uint8_t upb_desctype_to_fieldtype[] = {
UPB_WIRE_TYPE_64BIT = 1, UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
UPB_WIRE_TYPE_DELIMITED = 2, UPB_TYPE_DOUBLE, /* DOUBLE */
UPB_WIRE_TYPE_START_GROUP = 3, UPB_TYPE_FLOAT, /* FLOAT */
UPB_WIRE_TYPE_END_GROUP = 4, UPB_TYPE_INT64, /* INT64 */
UPB_WIRE_TYPE_32BIT = 5 UPB_TYPE_UINT64, /* UINT64 */
} upb_wiretype_t; UPB_TYPE_INT32, /* INT32 */
UPB_TYPE_UINT64, /* FIXED64 */
UPB_TYPE_UINT32, /* FIXED32 */
UPB_TYPE_BOOL, /* BOOL */
UPB_TYPE_STRING, /* STRING */
UPB_TYPE_MESSAGE, /* GROUP */
UPB_TYPE_MESSAGE, /* MESSAGE */
UPB_TYPE_BYTES, /* BYTES */
UPB_TYPE_UINT32, /* UINT32 */
UPB_TYPE_ENUM, /* ENUM */
UPB_TYPE_INT32, /* SFIXED32 */
UPB_TYPE_INT64, /* SFIXED64 */
UPB_TYPE_INT32, /* SINT32 */
UPB_TYPE_INT64, /* SINT64 */
};
/* Data pertaining to the parse. */
typedef struct { typedef struct {
upb_env *env; upb_env *env;
/* Current decoding pointer. Points to the beginning of a field until we /* Current decoding pointer. Points to the beginning of a field until we
@ -18,20 +34,23 @@ typedef struct {
const char *ptr; const char *ptr;
} upb_decstate; } upb_decstate;
/* Data pertaining to a single message frame. */
typedef struct { typedef struct {
const char *limit;
int32_t group_number; /* 0 if we are not parsing a group. */ int32_t group_number; /* 0 if we are not parsing a group. */
/* These members are unset for an unknown group frame. */
char *msg; char *msg;
const upb_msglayout_msginit_v1 *m; const upb_msglayout_msginit_v1 *m;
const char *limit;
} upb_decframe; } upb_decframe;
#define CHK(x) if (!(x)) { return false; } #define CHK(x) if (!(x)) { return false; }
static void upb_decode_seterr(upb_env *env, const char *msg) { static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
upb_status status = UPB_STATUS_INIT; const char *limit);
upb_status_seterrmsg(&status, msg); static bool upb_decode_message(upb_decstate *d, const char *limit,
upb_env_reporterror(env, &status); int group_number, char *msg,
} const upb_msglayout_msginit_v1 *l);
static bool upb_decode_varint(const char **ptr, const char *limit, static bool upb_decode_varint(const char **ptr, const char *limit,
uint64_t *val) { uint64_t *val) {
@ -41,10 +60,7 @@ static bool upb_decode_varint(const char **ptr, const char *limit,
*val = 0; *val = 0;
while (byte & 0x80) { while (byte & 0x80) {
if (bitpos == 70 || p == limit) { CHK(bitpos < 70 && p < limit);
return false;
}
byte = *p; byte = *p;
*val |= (uint64_t)(byte & 0x7F) << bitpos; *val |= (uint64_t)(byte & 0x7F) << bitpos;
p++; p++;
@ -58,47 +74,34 @@ static bool upb_decode_varint(const char **ptr, const char *limit,
static bool upb_decode_varint32(const char **ptr, const char *limit, static bool upb_decode_varint32(const char **ptr, const char *limit,
uint32_t *val) { uint32_t *val) {
uint64_t u64; uint64_t u64;
if (!upb_decode_varint(ptr, limit, &u64) || u64 > UINT32_MAX) { CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
return false; *val = u64;
} else { return true;
*val = u64;
return true;
}
}
static const upb_msglayout_fieldinit_v1 *upb_find_field(
const upb_msglayout_msginit_v1 *l, uint32_t field_number) {
/* Lots of optimization opportunities here. */
int i;
for (i = 0; i < l->field_count; i++) {
if (l->fields[i].number == field_number) {
return &l->fields[i];
}
}
return NULL; /* Unknown field. */
} }
static bool upb_decode_64bit(const char **ptr, const char *limit, static bool upb_decode_64bit(const char **ptr, const char *limit,
uint64_t *val) { uint64_t *val) {
if (limit - *ptr < 8) { CHK(limit - *ptr >= 8);
return false; memcpy(val, *ptr, 8);
} else { *ptr += 8;
memcpy(val, *ptr, 8); return true;
*ptr += 8;
return true;
}
} }
static bool upb_decode_32bit(const char **ptr, const char *limit, static bool upb_decode_32bit(const char **ptr, const char *limit,
uint32_t *val) { uint32_t *val) {
if (limit - *ptr < 4) { CHK(limit - *ptr >= 4);
return false; memcpy(val, *ptr, 4);
} else { *ptr += 4;
memcpy(val, *ptr, 4); return true;
*ptr += 4; }
return true;
} static bool upb_decode_tag(const char **ptr, const char *limit,
int *field_number, int *wire_type) {
uint32_t tag;
CHK(upb_decode_varint32(ptr, limit, &tag));
*field_number = tag >> 3;
*wire_type = tag & 7;
return true;
} }
static int32_t upb_zzdec_32(uint32_t n) { static int32_t upb_zzdec_32(uint32_t n) {
@ -113,10 +116,7 @@ static bool upb_decode_string(const char **ptr, const char *limit,
upb_stringview *val) { upb_stringview *val) {
uint32_t len; uint32_t len;
if (!upb_decode_varint32(ptr, limit, &len) || CHK(upb_decode_varint32(ptr, limit, &len) && limit - *ptr >= len);
limit - *ptr < len) {
return false;
}
*val = upb_stringview_make(*ptr, len); *val = upb_stringview_make(*ptr, len);
*ptr += len; *ptr += len;
@ -127,277 +127,436 @@ static void upb_set32(void *msg, size_t ofs, uint32_t val) {
memcpy((char*)msg + ofs, &val, sizeof(val)); memcpy((char*)msg + ofs, &val, sizeof(val));
} }
static bool upb_append_unknownfield(const char **ptr, const char *start, static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame,
const char *limit, char *msg) { const char *start) {
UPB_UNUSED(limit); UPB_UNUSED(d);
UPB_UNUSED(msg); UPB_UNUSED(frame);
*ptr = limit; UPB_UNUSED(start);
return true; return true;
} }
static bool upb_decode_unknownfielddata(upb_decstate *d, const char *ptr, static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame,
const char *limit, char *msg, int field_number, int wire_type) {
const upb_msglayout_msginit_v1 *l) { switch (wire_type) {
do { case UPB_WIRE_TYPE_VARINT: {
switch (wire_type) { uint64_t val;
case UPB_WIRE_TYPE_VARINT: return upb_decode_varint(&d->ptr, frame->limit, &val);
CHK(upb_decode_varint(&ptr, limit, &val)); }
break; case UPB_WIRE_TYPE_32BIT: {
case UPB_WIRE_TYPE_32BIT: uint32_t val;
CHK(upb_decode_32bit(&ptr, limit, &val)); return upb_decode_32bit(&d->ptr, frame->limit, &val);
break; }
case UPB_WIRE_TYPE_64BIT: case UPB_WIRE_TYPE_64BIT: {
CHK(upb_decode_64bit(&ptr, limit, &val)); uint64_t val;
break; return upb_decode_64bit(&d->ptr, frame->limit, &val);
case UPB_WIRE_TYPE_DELIMITED: {
upb_stringview val;
CHK(upb_decode_string(&ptr, limit, &val));
}
case UPB_WIRE_TYPE_START_GROUP:
depth++;
continue;
case UPB_WIRE_TYPE_END_GROUP:
depth--;
continue;
} }
case UPB_WIRE_TYPE_DELIMITED: {
upb_stringview val;
return upb_decode_string(&d->ptr, frame->limit, &val);
}
case UPB_WIRE_TYPE_START_GROUP:
return upb_skip_unknowngroup(d, field_number, frame->limit);
case UPB_WIRE_TYPE_END_GROUP:
CHK(field_number == frame->group_number);
frame->limit = d->ptr;
return true;
}
return false;
}
UPB_ASSERT(depth == 0); static bool upb_array_grow(upb_array *arr, size_t elements) {
upb_append_unknown(msg, l, d->ptr, ptr); size_t needed = arr->len + elements;
d->ptr = ptr; size_t new_size = UPB_MAX(arr->size, 8);
return true; size_t new_bytes;
} while (true); size_t old_bytes;
void *new_data;
while (new_size < needed) {
new_size *= 2;
}
old_bytes = arr->len * arr->element_size;
new_bytes = new_size * arr->element_size;
new_data = upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes);
CHK(new_data);
arr->data = new_data;
arr->size = new_size;
return true;
} }
static bool upb_decode_knownfield(upb_decstate *d, const char *ptr, static void *upb_array_reserve(upb_array *arr, size_t elements) {
const char *limit, char *msg, if (arr->size - arr->len < elements) {
const upb_msglayout_msginit_v1 *l, CHK(upb_array_grow(arr, elements));
const upb_msglayout_fieldinit_v1 *l) {
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
return upb_decode_varintfield(d, ptr, limit, msg, l, f);
case UPB_WIRE_TYPE_32BIT:
return upb_decode_32bitfield(d, ptr, limit, msg, l, f);
case UPB_WIRE_TYPE_64BIT:
return upb_decode_64bitfield(d, ptr, limit, msg, l, f);
case UPB_WIRE_TYPE_DELIMITED:
return upb_decode_delimitedfield(d, ptr, limit, msg, l, f);
case UPB_WIRE_TYPE_START_GROUP:
case UPB_WIRE_TYPE_END_GROUP:
} }
return (char*)arr->data + (arr->len * arr->element_size);
} }
static void *upb_array_add(upb_array *arr, size_t elements) {
void *ret = upb_array_reserve(arr, elements);
arr->len += elements;
return ret;
}
static bool upb_decode_field(upb_decstate *d, const char *limit, char *msg, static upb_array *upb_getarr(upb_decframe *frame,
const upb_msglayout_msginit_v1 *l) { const upb_msglayout_fieldinit_v1 *field) {
uint32_t tag; UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
uint32_t wire_type; return *(upb_array**)&frame->msg[field->offset];
uint32_t field_number; }
const char *ptr = d->ptr;
const upb_msglayout_fieldinit_v1 *f; static upb_array *upb_getorcreatearr(upb_decstate *d,
upb_decframe *frame,
if (!upb_decode_varint32(&ptr, limit, &tag)) { const upb_msglayout_fieldinit_v1 *field) {
upb_decode_seterr(env, "Error decoding tag.\n"); upb_array *arr = upb_getarr(frame, field);
return false;
if (!arr) {
arr = upb_env_malloc(d->env, sizeof(*arr));
if (!arr) {
return NULL;
}
upb_array_init(arr, upb_desctype_to_fieldtype[field->type],
upb_arena_alloc(upb_env_arena(d->env)));
*(upb_array**)&frame->msg[field->offset] = arr;
} }
wire_type = tag & 0x7; return arr;
field_number = tag >> 3; }
static void upb_sethasbit(upb_decframe *frame,
const upb_msglayout_fieldinit_v1 *field) {
UPB_ASSERT(field->hasbit != UPB_NO_HASBIT);
frame->msg[field->hasbit / 8] |= (1 << (field->hasbit % 8));
}
static void upb_setoneofcase(upb_decframe *frame,
const upb_msglayout_fieldinit_v1 *field) {
UPB_ASSERT(field->oneof_index != UPB_NOT_IN_ONEOF);
upb_set32(frame->msg, frame->m->oneofs[field->oneof_index].case_offset,
field->number);
}
static char *upb_decode_prepareslot(upb_decstate *d,
upb_decframe *frame,
const upb_msglayout_fieldinit_v1 *field) {
char *field_mem = frame->msg + field->offset;
upb_array *arr;
if (field_number == 0) { if (field->label == UPB_LABEL_REPEATED) {
return false; arr = upb_getorcreatearr(d, frame, field);
field_mem = upb_array_reserve(arr, 1);
} }
f = upb_find_field(l, field_number); return field_mem;
}
if (f) { static void upb_decode_setpresent(upb_decframe *frame,
return upb_decode_knownfield(d, ptr, limit, msg, l, f); const upb_msglayout_fieldinit_v1 *field) {
} else { if (field->label == UPB_LABEL_REPEATED) {
return upb_decode_unknownfield(d, ptr, limit, msg, l); upb_array *arr = upb_getarr(frame, field);
UPB_ASSERT(arr->len < arr->size);
arr->len++;
} else if (field->oneof_index != UPB_NOT_IN_ONEOF) {
upb_setoneofcase(frame, field);
} else if (field->hasbit != UPB_NO_HASBIT) {
upb_sethasbit(frame, field);
} }
}
if (f->label == UPB_LABEL_REPEATED) { static bool upb_decode_submsg(upb_decstate *d,
arr = upb_getarray(msg, f, env); upb_decframe *frame,
const char *limit,
const upb_msglayout_fieldinit_v1 *field,
int group_number) {
char *submsg = *(void**)&frame->msg[field->offset];
const upb_msglayout_msginit_v1 *subm;
UPB_ASSERT(field->submsg_index != UPB_NO_SUBMSG);
subm = frame->m->submsgs[field->submsg_index];
UPB_ASSERT(subm);
if (!submsg) {
submsg = upb_env_malloc(d->env, upb_msg_sizeof((upb_msglayout *)subm));
CHK(submsg);
submsg = upb_msg_init(
submsg, (upb_msglayout*)subm, upb_arena_alloc(upb_env_arena(d->env)));
*(void**)&frame->msg[field->offset] = submsg;
} }
switch (wire_type) { upb_decode_message(d, limit, group_number, submsg, subm);
case UPB_WIRE_TYPE_VARINT: {
uint64_t val;
if (!upb_decode_varint(&ptr, limit, &val)) {
upb_decode_seterr(env, "Error decoding varint value.\n");
return false;
}
if (!f) { return true;
return upb_append_unknown(ptr, field_start, ptr, msg); }
}
if (f->label == UPB_LABEL_REPEATED) { static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
upb_array *arr = upb_getarray(msg, f, env); const char *field_start,
switch (f->type) { const upb_msglayout_fieldinit_v1 *field) {
case UPB_DESCRIPTOR_TYPE_INT64: uint64_t val;
case UPB_DESCRIPTOR_TYPE_UINT64: void *field_mem;
memcpy(arr->data, &val, sizeof(val));
arr->len++;
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM: {
uint32_t val32 = val;
memcpy(arr->data, &val32, sizeof(val32));
arr->len++;
break;
}
case UPB_DESCRIPTOR_TYPE_SINT32: {
int32_t decoded = upb_zzdec_32(val);
memcpy(arr->data, &decoded, sizeof(decoded));
arr->len++;
break;
}
case UPB_DESCRIPTOR_TYPE_SINT64: {
int64_t decoded = upb_zzdec_64(val);
memcpy(arr->data, &decoded, sizeof(decoded));
arr->len++;
break;
}
default:
return upb_append_unknown(ptr, field_start, ptr, msg);
}
} else {
switch (f->type) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
memcpy(msg + f->offset, &val, sizeof(val));
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM: {
uint32_t val32 = val;
memcpy(msg + f->offset, &val32, sizeof(val32));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT32: {
int32_t decoded = upb_zzdec_32(val);
memcpy(msg + f->offset, &decoded, sizeof(decoded));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT64: {
int64_t decoded = upb_zzdec_64(val);
memcpy(msg + f->offset, &decoded, sizeof(decoded));
break;
}
default:
return upb_append_unknown(ptr, field_start, ptr, msg);
}
}
field_mem = upb_decode_prepareslot(d, frame, field);
CHK(field_mem);
CHK(upb_decode_varint(&d->ptr, frame->limit, &val));
switch ((upb_descriptortype_t)field->type) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
memcpy(field_mem, &val, sizeof(val));
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM: {
uint32_t val32 = val;
memcpy(field_mem, &val32, sizeof(val32));
break; break;
} }
case UPB_WIRE_TYPE_64BIT: { case UPB_DESCRIPTOR_TYPE_SINT32: {
uint64_t val; int32_t decoded = upb_zzdec_32(val);
if (!upb_decode_64bit(&ptr, limit, &val)) { memcpy(field_mem, &decoded, sizeof(decoded));
upb_decode_seterr(env, "Error decoding 64bit value.\n"); break;
return false; }
} case UPB_DESCRIPTOR_TYPE_SINT64: {
int64_t decoded = upb_zzdec_64(val);
memcpy(field_mem, &decoded, sizeof(decoded));
break;
}
default:
return upb_append_unknown(d, frame, field_start);
}
if (!f) { upb_decode_setpresent(frame, field);
return upb_append_unknown(ptr, field_start, ptr, msg); return true;
} }
switch (f->type) { static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
case UPB_DESCRIPTOR_TYPE_DOUBLE: const char *field_start,
case UPB_DESCRIPTOR_TYPE_FIXED64: const upb_msglayout_fieldinit_v1 *field) {
case UPB_DESCRIPTOR_TYPE_SFIXED64: void *field_mem;
memcpy(msg + f->offset, &val, sizeof(val)); uint64_t val;
default:
return upb_append_unknown(ptr, field_start, ptr, msg); field_mem = upb_decode_prepareslot(d, frame, field);
} CHK(field_mem);
CHK(upb_decode_64bit(&d->ptr, frame->limit, &val));
switch ((upb_descriptortype_t)field->type) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
case UPB_DESCRIPTOR_TYPE_FIXED64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
memcpy(field_mem, &val, sizeof(val));
break;
default:
return upb_append_unknown(d, frame, field_start);
}
upb_decode_setpresent(frame, field);
return true;
}
static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
const char *field_start,
const upb_msglayout_fieldinit_v1 *field) {
void *field_mem;
uint32_t val;
field_mem = upb_decode_prepareslot(d, frame, field);
CHK(field_mem);
CHK(upb_decode_32bit(&d->ptr, frame->limit, &val));
switch ((upb_descriptortype_t)field->type) {
case UPB_DESCRIPTOR_TYPE_FLOAT:
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
memcpy(field_mem, &val, sizeof(val));
break; break;
default:
return upb_append_unknown(d, frame, field_start);
}
upb_decode_setpresent(frame, field);
return true;
}
static bool upb_decode_fixedpacked(upb_array *arr, upb_stringview data,
int elem_size) {
int elements = data.size / elem_size;
void *field_mem;
CHK((data.size % elem_size) == 0);
field_mem = upb_array_add(arr, elements);
CHK(field_mem);
memcpy(field_mem, data.data, data.size);
return true;
}
static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
const char *field_start,
const upb_msglayout_fieldinit_v1 *field,
upb_stringview val) {
upb_array *arr = upb_getorcreatearr(d, frame, field);
#define VARINT_CASE(ctype, decode) { \
const char *ptr = val.data; \
const char *limit = ptr + val.size; \
while (ptr < limit) { \
uint64_t val; \
void *field_mem; \
ctype decoded; \
CHK(upb_decode_varint(&ptr, limit, &val)); \
decoded = (decode)(val); \
field_mem = upb_array_add(arr, 1); \
CHK(field_mem); \
memcpy(field_mem, &decoded, sizeof(ctype)); \
} \
return true; \
}
switch ((upb_descriptortype_t)field->type) {
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
void *field_mem = upb_array_add(arr, 1);
CHK(field_mem);
memcpy(field_mem, &val, sizeof(val));
return true;
} }
case UPB_WIRE_TYPE_32BIT: { case UPB_DESCRIPTOR_TYPE_FLOAT:
uint32_t val; case UPB_DESCRIPTOR_TYPE_FIXED32:
if (!upb_decode_32bit(&ptr, limit, &val)) { case UPB_DESCRIPTOR_TYPE_SFIXED32:
upb_decode_seterr(env, "Error decoding 32bit value.\n"); return upb_decode_fixedpacked(arr, val, sizeof(int32_t));
return false; case UPB_DESCRIPTOR_TYPE_DOUBLE:
} case UPB_DESCRIPTOR_TYPE_FIXED64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
return upb_decode_fixedpacked(arr, val, sizeof(int64_t));
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
/* TODO: proto2 enum field that isn't in the enum. */
VARINT_CASE(uint32_t, uint32_t);
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
VARINT_CASE(uint64_t, uint64_t);
case UPB_DESCRIPTOR_TYPE_BOOL:
VARINT_CASE(bool, bool);
case UPB_DESCRIPTOR_TYPE_SINT32:
VARINT_CASE(int32_t, upb_zzdec_32);
case UPB_DESCRIPTOR_TYPE_SINT64:
VARINT_CASE(int64_t, upb_zzdec_64);
case UPB_DESCRIPTOR_TYPE_MESSAGE:
CHK(val.size <= (size_t)(frame->limit - val.data));
return upb_decode_submsg(d, frame, val.data + val.size, field, 0);
case UPB_DESCRIPTOR_TYPE_GROUP:
return upb_append_unknown(d, frame, field_start);
}
#undef VARINT_CASE
}
if (!f) { static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
return upb_append_unknown(ptr, field_start, ptr, msg); const char *field_start,
} const upb_msglayout_fieldinit_v1 *field) {
upb_stringview val;
CHK(upb_decode_string(&d->ptr, frame->limit, &val));
switch (f->type) { if (field->label == UPB_LABEL_REPEATED) {
case UPB_DESCRIPTOR_TYPE_FLOAT: return upb_decode_toarray(d, frame, field_start, field, val);
case UPB_DESCRIPTOR_TYPE_FIXED32: } else {
case UPB_DESCRIPTOR_TYPE_SFIXED32: switch ((upb_descriptortype_t)field->type) {
memcpy(msg + f->offset, &val, sizeof(val)); case UPB_DESCRIPTOR_TYPE_STRING:
default: case UPB_DESCRIPTOR_TYPE_BYTES: {
return upb_append_unknown(ptr, field_start, ptr, msg); void *field_mem = upb_decode_prepareslot(d, frame, field);
CHK(field_mem);
memcpy(field_mem, &val, sizeof(val));
break;
} }
case UPB_DESCRIPTOR_TYPE_MESSAGE:
CHK(val.size <= (size_t)(frame->limit - val.data));
CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0));
break;
default:
/* TODO(haberman): should we accept the last element of a packed? */
return upb_append_unknown(d, frame, field_start);
}
upb_decode_setpresent(frame, field);
return true;
}
}
break; static const upb_msglayout_fieldinit_v1 *upb_find_field(
const upb_msglayout_msginit_v1 *l, uint32_t field_number) {
/* Lots of optimization opportunities here. */
int i;
for (i = 0; i < l->field_count; i++) {
if (l->fields[i].number == field_number) {
return &l->fields[i];
} }
case UPB_WIRE_TYPE_DELIMITED: { }
upb_stringview val;
if (!upb_decode_string(&ptr, limit, &val)) {
upb_decode_seterr(env, "Error decoding delimited value.\n");
return false;
}
if (!f) { return NULL; /* Unknown field. */
return upb_append_unknown(ptr, field_start, ptr, msg); }
}
switch (f->type) { static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
case UPB_DESCRIPTOR_TYPE_STRING: int field_number;
case UPB_DESCRIPTOR_TYPE_BYTES: int wire_type;
memcpy(msg + f->offset, &val, sizeof(val)); const char *field_start = d->ptr;
break; const upb_msglayout_fieldinit_v1 *field;
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64: {
memcpy(msg + f->offset, &val, sizeof(val));
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM: {
uint32_t val32 = val;
memcpy(msg + f->offset, &val32, sizeof(val32));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT32: {
int32_t decoded = upb_zzdec_32(val);
memcpy(msg + f->offset, &decoded, sizeof(decoded));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT64:
case UPB_DESCRIPTOR_TYPE_FLOAT:
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
/*
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
upb_decode_message(val,
}
*/
default:
return upb_append_unknown(ptr, field_start, ptr, msg);
}
break; CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type));
field = upb_find_field(frame->m, field_number);
if (field) {
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
return upb_decode_varintfield(d, frame, field_start, field);
case UPB_WIRE_TYPE_32BIT:
return upb_decode_32bitfield(d, frame, field_start, field);
case UPB_WIRE_TYPE_64BIT:
return upb_decode_64bitfield(d, frame, field_start, field);
case UPB_WIRE_TYPE_DELIMITED:
return upb_decode_delimitedfield(d, frame, field_start, field);
case UPB_WIRE_TYPE_START_GROUP:
CHK(field->type == UPB_DESCRIPTOR_TYPE_GROUP);
return upb_decode_submsg(d, frame, frame->limit, field, field_number);
case UPB_WIRE_TYPE_END_GROUP:
CHK(frame->group_number == field_number)
frame->limit = d->ptr;
return true;
default:
return false;
} }
} else {
CHK(field_number != 0);
return upb_skip_unknownfielddata(d, frame, field_number, wire_type);
} }
}
static bool upb_skip_unknowngroup(upb_decstate *d, int field_number,
const char *limit) {
upb_decframe frame;
frame.msg = NULL;
frame.m = NULL;
frame.group_number = field_number;
frame.limit = limit;
if (f->oneof_index != UPB_NOT_IN_ONEOF) { while (d->ptr < frame.limit) {
upb_set32(msg, l->oneofs[f->oneof_index].case_offset, f->number); int wire_type;
int field_number;
CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type));
CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type));
} }
d->ptr = ptr;
return true; return true;
} }
static bool upb_decode_message(upb_decstate *d, upb_decframe *frame) { static bool upb_decode_message(upb_decstate *d, const char *limit,
while (d->ptr < frame->limit) { int group_number, char *msg,
if (!upb_decode_field(d, frame)) { const upb_msglayout_msginit_v1 *l) {
return false; upb_decframe frame;
} frame.group_number = group_number;
frame.limit = limit;
frame.msg = msg;
frame.m = l;
while (d->ptr < frame.limit) {
CHK(upb_decode_field(d, &frame));
} }
return true; return true;
@ -409,11 +568,5 @@ bool upb_decode(upb_stringview buf, void *msg,
state.ptr = buf.data; state.ptr = buf.data;
state.env = env; state.env = env;
upb_decframe frame; return upb_decode_message(&state, buf.data + buf.size, 0, msg, l);
frame.msg = msg;
frame.l = l;
frame.group_number = 0;
frame.limit = buf.data + buf.size
return upb_decode_message(&state, &frame);
} }

@ -1,10 +1,35 @@
/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
#include "upb/upb.h"
#include "upb/encode.h" #include "upb/encode.h"
#include "upb/structs.int.h" #include "upb/structs.int.h"
#define UPB_PB_VARINT_MAX_LEN 10 #define UPB_PB_VARINT_MAX_LEN 10
#define CHK(x) do { if (!(x)) { return false; } } while(0) #define CHK(x) do { if (!(x)) { return false; } } while(0)
/* Maps descriptor type -> upb field type. */
static const uint8_t upb_desctype_to_fieldtype[] = {
UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
UPB_TYPE_DOUBLE, /* DOUBLE */
UPB_TYPE_FLOAT, /* FLOAT */
UPB_TYPE_INT64, /* INT64 */
UPB_TYPE_UINT64, /* UINT64 */
UPB_TYPE_INT32, /* INT32 */
UPB_TYPE_UINT64, /* FIXED64 */
UPB_TYPE_UINT32, /* FIXED32 */
UPB_TYPE_BOOL, /* BOOL */
UPB_TYPE_STRING, /* STRING */
UPB_TYPE_MESSAGE, /* GROUP */
UPB_TYPE_MESSAGE, /* MESSAGE */
UPB_TYPE_BYTES, /* BYTES */
UPB_TYPE_UINT32, /* UINT32 */
UPB_TYPE_ENUM, /* ENUM */
UPB_TYPE_INT32, /* SFIXED32 */
UPB_TYPE_INT64, /* SFIXED64 */
UPB_TYPE_INT32, /* SINT32 */
UPB_TYPE_INT64, /* SINT64 */
};
static size_t upb_encode_varint(uint64_t val, char *buf) { static size_t upb_encode_varint(uint64_t val, char *buf) {
size_t i; size_t i;
if (val == 0) { buf[0] = 0; return 1; } if (val == 0) { buf[0] = 0; return 1; }
@ -21,38 +46,6 @@ static size_t upb_encode_varint(uint64_t val, char *buf) {
static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); } static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); } static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
typedef enum {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
} upb_wiretype_t;
/* Index is descriptor type. */
const uint8_t upb_native_wiretypes[] = {
UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
UPB_WIRE_TYPE_64BIT, /* DOUBLE */
UPB_WIRE_TYPE_32BIT, /* FLOAT */
UPB_WIRE_TYPE_VARINT, /* INT64 */
UPB_WIRE_TYPE_VARINT, /* UINT64 */
UPB_WIRE_TYPE_VARINT, /* INT32 */
UPB_WIRE_TYPE_64BIT, /* FIXED64 */
UPB_WIRE_TYPE_32BIT, /* FIXED32 */
UPB_WIRE_TYPE_VARINT, /* BOOL */
UPB_WIRE_TYPE_DELIMITED, /* STRING */
UPB_WIRE_TYPE_START_GROUP, /* GROUP */
UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
UPB_WIRE_TYPE_DELIMITED, /* BYTES */
UPB_WIRE_TYPE_VARINT, /* UINT32 */
UPB_WIRE_TYPE_VARINT, /* ENUM */
UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
UPB_WIRE_TYPE_VARINT, /* SINT32 */
UPB_WIRE_TYPE_VARINT, /* SINT64 */
};
typedef struct { typedef struct {
upb_env *env; upb_env *env;
char *buf, *ptr, *limit; char *buf, *ptr, *limit;
@ -165,21 +158,24 @@ static bool upb_encode_array(upb_encstate *e, const char *field_mem,
const upb_msglayout_fieldinit_v1 *f) { const upb_msglayout_fieldinit_v1 *f) {
const upb_array *arr = *(const upb_array**)field_mem; const upb_array *arr = *(const upb_array**)field_mem;
if (arr->len == 0) { if (arr == NULL || arr->len == 0) {
return true; return true;
} }
#define VARINT_CASE(ctype, encode) do { \ UPB_ASSERT(arr->type == upb_desctype_to_fieldtype[f->type]);
uint64_t *start = arr->data; \
uint64_t *ptr = start + arr->len; \ #define VARINT_CASE(ctype, encode) { \
ctype *start = arr->data; \
ctype *ptr = start + arr->len; \
char *buf_ptr = e->ptr; \ char *buf_ptr = e->ptr; \
do { \ do { \
ptr--; \ ptr--; \
CHK(upb_put_varint(e, encode)); \ CHK(upb_put_varint(e, encode)); \
} while (ptr != start); \ } while (ptr != start); \
CHK(upb_put_varint(e, buf_ptr - e->ptr)); \ CHK(upb_put_varint(e, buf_ptr - e->ptr)); \
break; \ } \
} while(0) break; \
do { ; } while(0)
switch (f->type) { switch (f->type) {
case UPB_DESCRIPTOR_TYPE_DOUBLE: case UPB_DESCRIPTOR_TYPE_DOUBLE:
@ -352,7 +348,7 @@ bool upb_encode_message(upb_encstate* e, const char *msg,
const upb_msglayout_fieldinit_v1 *f = &m->fields[i]; const upb_msglayout_fieldinit_v1 *f = &m->fields[i];
if (f->label == UPB_LABEL_REPEATED) { if (f->label == UPB_LABEL_REPEATED) {
CHK(upb_encode_array(e, msg, m, f)); CHK(upb_encode_array(e, msg + f->offset, m, f));
} else { } else {
if (upb_encode_hasscalarfield(msg, m, f)) { if (upb_encode_hasscalarfield(msg, m, f)) {
CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)); CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2));
@ -372,10 +368,14 @@ char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m,
e.limit = NULL; e.limit = NULL;
e.ptr = NULL; e.ptr = NULL;
if (!upb_encode_message(&e, msg, m, size)) { CHK(upb_encode_message(&e, msg, m, size));
return false;
}
*size = e.limit - e.ptr; *size = e.limit - e.ptr;
return e.ptr;
if (*size == 0) {
static char ch;
return &ch;
} else {
UPB_ASSERT(e.ptr);
return e.ptr;
}
} }

@ -68,7 +68,7 @@ static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
case UPB_TYPE_MESSAGE: case UPB_TYPE_MESSAGE:
return sizeof(void*); return sizeof(void*);
case UPB_TYPE_STRING: case UPB_TYPE_STRING:
return sizeof(char*) + sizeof(size_t); return sizeof(upb_stringview);
} }
UPB_UNREACHABLE(); UPB_UNREACHABLE();
} }

@ -388,13 +388,14 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
#define UPB_NOT_IN_ONEOF UINT16_MAX #define UPB_NOT_IN_ONEOF UINT16_MAX
#define UPB_NO_HASBIT UINT16_MAX #define UPB_NO_HASBIT UINT16_MAX
#define UPB_NO_SUBMSG UINT16_MAX
typedef struct { typedef struct {
uint32_t number; uint32_t number;
uint32_t offset; /* If in a oneof, offset of default in default_msg below. */ uint32_t offset; /* If in a oneof, offset of default in default_msg below. */
uint16_t hasbit; uint16_t hasbit; /* UPB_NO_HASBIT if no hasbit. */
uint16_t oneof_index; /* UPB_NOT_IN_ONEOF if not in a oneof. */ uint16_t oneof_index; /* UPB_NOT_IN_ONEOF if not in a oneof. */
uint16_t submsg_index; uint16_t submsg_index; /* UPB_NO_SUBMSG if no submsg. */
uint8_t type; uint8_t type;
uint8_t label; uint8_t label;
} upb_msglayout_fieldinit_v1; } upb_msglayout_fieldinit_v1;

@ -15,21 +15,9 @@
extern "C" { extern "C" {
#endif #endif
/* A list of types as they are encoded on-the-wire. */
typedef enum {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
} upb_wiretype_t;
#define UPB_MAX_WIRE_TYPE 5 #define UPB_MAX_WIRE_TYPE 5
/* The maximum number of bytes that it takes to encode a 64-bit varint. /* The maximum number of bytes that it takes to encode a 64-bit varint. */
* Note that with a better encoding this could be 9 (TODO: write up a
* wiki document about this). */
#define UPB_PB_VARINT_MAX_LEN 10 #define UPB_PB_VARINT_MAX_LEN 10
/* Array of the "native" (ie. non-packed-repeated) wire type for the given a /* Array of the "native" (ie. non-packed-repeated) wire type for the given a

@ -298,6 +298,16 @@ class PointerBase2 : public PointerBase<T, Base> {
#endif #endif
/* A list of types as they are encoded on-the-wire. */
typedef enum {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
} upb_wiretype_t;
/* upb::ErrorSpace ************************************************************/ /* upb::ErrorSpace ************************************************************/
@ -626,7 +636,7 @@ void upb_env_uninit(upb_env *e);
void upb_env_initonly(upb_env *e); void upb_env_initonly(upb_env *e);
upb_arena *upb_env_arena(upb_env *e); UPB_INLINE upb_arena *upb_env_arena(upb_env *e) { return (upb_arena*)e; }
bool upb_env_ok(const upb_env *e); bool upb_env_ok(const upb_env *e);
void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud); void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud);

Loading…
Cancel
Save