A good start on upb_encode and upb_decode.

pull/13171/head
Josh Haberman 7 years ago
parent dd536fd567
commit 1aafd4111b
  1. 3
      .gitmodules
  2. 19
      Makefile
  3. 23
      tests/conformance_upb.c
  4. 1
      third_party/protobuf
  5. 139
      tools/make_c_api.lua
  6. 247
      upb/decode.c
  7. 17
      upb/decode.h
  8. 512
      upb/encode.c
  9. 17
      upb/encode.h
  10. 10
      upb/msg.c
  11. 1
      upb/msg.h
  12. 18
      upb/structs.int.h
  13. 3
      upb/upb.h

3
.gitmodules vendored

@ -0,0 +1,3 @@
[submodule "third_party/protobuf"]
path = third_party/protobuf
url = https://github.com/google/protobuf.git

@ -110,7 +110,7 @@ clean_leave_profile:
@rm -rf obj lib
@rm -f tests/google_message?.h
@rm -f tests/json/test.upbdefs.o
@rm -f $(TESTS) tests/testmain.o tests/t.*
@rm -f $(TESTS) tests/testmain.o tests/t.* tests/conformance_upb
@rm -rf tools/upbc deps
@rm -rf upb/bindings/python/build
@rm -f upb/bindings/ruby/Makefile
@ -148,7 +148,9 @@ make_objs_cc = $$(patsubst upb/$$(pc).cc,obj/upb/$$(pc).$(1),$$($$(call to_srcs,
# Core libraries (ie. not bindings). ###############################################################
upb_SRCS = \
upb/decode.c \
upb/def.c \
upb/encode.c \
upb/handlers.c \
upb/msg.c \
upb/refcounted.c \
@ -361,6 +363,21 @@ test:
done;
@echo "All tests passed!"
obj/conformance_protos: obj/conformance_protos.pb tools/upbc
cd obj && ../tools/upbc conformance_protos.pb && touch conformance_protos
obj/conformance_protos.pb: third_party/protobuf/autogen.sh
protoc -Ithird_party/protobuf/conformance -Ithird_party/protobuf/src --include_imports \
third_party/protobuf/conformance/conformance.proto \
third_party/protobuf/src/google/protobuf/test_messages_proto3.proto \
-o obj/conformance_protos.pb
third_party/protouf/autogen.sh: .gitmodules
git submodule init && git submodule update
tests/conformance_upb: tests/conformance_upb.c lib/libupb.a obj/conformance_protos
$(CC) -o tests/conformance_upb tests/conformance_upb.c -Iobj -I. $(CPPFLAGS) $(CFLAGS) obj/conformance.upb.c obj/google/protobuf/*.upb.c lib/libupb.a
# Google protobuf binding ######################################################

@ -4,6 +4,7 @@
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@ -52,7 +53,7 @@ void DoTest(
if (!test_message) {
/* TODO(haberman): return details. */
static char msg[] = "Parse error (no more details available).";
static const char msg[] = "Parse error (no more details available).";
conformance_ConformanceResponse_set_parse_error(
response, upb_stringview_make(msg, sizeof(msg)));
return;
@ -60,20 +61,20 @@ void DoTest(
break;
case conformance_ConformanceRequest_payload_json_payload: {
static char msg[] = "JSON support not yet implemented.";
static const char msg[] = "JSON support not yet implemented.";
conformance_ConformanceResponse_set_skipped(
response, upb_stringview_make(msg, sizeof(msg)));
return;
}
case conformance_ConformanceRequest_payload_NOT_SET:
fprintf(stderr, "conformance_upb: Request didn't have payload.");
exit(1);
fprintf(stderr, "conformance_upb: Request didn't have payload.\n");
return;
}
switch (conformance_ConformanceRequest_requested_output_format(request)) {
case conformance_UNSPECIFIED:
fprintf(stderr, "conformance_upb: Unspecified output format.");
fprintf(stderr, "conformance_upb: Unspecified output format.\n");
exit(1);
case conformance_PROTOBUF: {
@ -81,8 +82,10 @@ void DoTest(
char *serialized = protobuf_test_messages_proto3_TestAllTypes_serialize(
test_message, env, &serialized_len);
if (!serialized) {
fprintf(stderr, "conformance_upb: Error serialiing.");
exit(1);
static const char msg[] = "Error serializing.";
conformance_ConformanceResponse_set_serialize_error(
response, upb_stringview_make(msg, sizeof(msg)));
return;
}
conformance_ConformanceResponse_set_protobuf_payload(
response, upb_stringview_make(serialized, serialized_len));
@ -90,14 +93,14 @@ void DoTest(
}
case conformance_JSON: {
static char msg[] = "JSON support not yet implemented.";
static const char msg[] = "JSON support not yet implemented.";
conformance_ConformanceResponse_set_skipped(
response, upb_stringview_make(msg, sizeof(msg)));
break;
}
default:
fprintf(stderr, "conformance_upb: Unknown output format: %d",
fprintf(stderr, "conformance_upb: Unknown output format: %d\n",
conformance_ConformanceRequest_requested_output_format(request));
exit(1);
}
@ -111,7 +114,7 @@ bool DoTestIo() {
char *serialized_input;
char *serialized_output;
uint32_t input_size;
size_t output_size;
size_t output_size = 0;
conformance_ConformanceRequest *request;
conformance_ConformanceResponse *response;

@ -0,0 +1 @@
Subproject commit 6bd51a59df41b99058ec8c2b03a177a218267ce5

@ -47,7 +47,6 @@ local function to_preproc(...)
return string.upper(to_cident(...))
end
-- Strips away last path element, ie:
-- foo.Bar.Baz -> foo.Bar
local function remove_name(name)
@ -60,6 +59,10 @@ local function remove_name(name)
return string.sub(name, 1, package_end)
end
local function enum_value_symbol(enumdef, name)
return to_cident(remove_name(enumdef:full_name())) .. "_" .. name
end
local function dump_enum_vals(enumdef, append)
local enum_vals = {}
@ -96,7 +99,7 @@ local function dump_enum_vals(enumdef, append)
local cident = to_cident(remove_name(enumdef:full_name()))
for i, pair in ipairs(enum_vals) do
k, v = pair[1], pair[2]
append(' %s = %d', cident .. "_" .. k, v)
append(' %s = %d', enum_value_symbol(enumdef, k), v)
if i == #enum_vals then
append('\n')
else
@ -105,6 +108,20 @@ local function dump_enum_vals(enumdef, append)
end
end
local function field_default(field)
if field:type() == upb.TYPE_MESSAGE then
return "NULL"
elseif field:type() == upb.TYPE_STRING or
field:type() == upb.TYPE_BYTES then
local default = field:default() or ""
return string.format('upb_stringview_make("%s", strlen("%s"))', field:default(), field:default())
elseif field:type() == upb.TYPE_ENUM then
return enum_value_symbol(field:subdef(), field:default())
else
return field:default();
end
end
local function ctype(field)
if field:label() == upb.LABEL_REPEATED then
return "upb_array*"
@ -134,19 +151,18 @@ end
local function field_layout_rank(field)
-- Order:
-- 1, 2, 3. primitive fields (8, 4, 1 byte)
-- 4. oneof fields
-- 5. string fields
-- 6. submessage fields
-- 7. repeated fields
-- 4. string fields
-- 5. submessage fields
-- 6. repeated fields
local rank
if field:containing_oneof() then
rank = 4
rank = 100 -- These go last (actually we skip them).
elseif field:label() == upb.LABEL_REPEATED then
rank = 7
elseif field:type() == upb.TYPE_MESSAGE then
rank = 6
elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then
elseif field:type() == upb.TYPE_MESSAGE then
rank = 5
elseif field:type() == upb.TYPE_STRING or field:type() == upb.TYPE_BYTES then
rank = 4
elseif field:type() == upb.TYPE_BOOL then
rank = 3
elseif field:type() == upb.TYPE_FLOAT or
@ -257,6 +273,8 @@ local function write_c_file(filedef, hfilename, append)
emit_file_warning(filedef, append)
append('#include <stddef.h>\n')
append('#include "upb/decode.h"\n\n')
append('#include "upb/encode.h"\n\n')
append('#include "upb/msg.h"\n')
append('#include "upb/upb.h"\n')
append('#include "%s"\n\n', hfilename)
@ -273,13 +291,29 @@ local function write_c_file(filedef, hfilename, append)
local fields_array_ref = "NULL"
local submsgs_array_ref = "NULL"
local oneofs_array_ref = "NULL"
local field_count = 0
local submsg_count = 0
local submsg_set = {}
local submsg_indexes = {}
local hasbit_count = 0
local hasbit_indexes = {}
-- TODO(haberman): oneofs
local oneof_count = 0
local oneof_indexes = {}
-- Create a layout order for oneofs.
local oneofs_layout_order = {}
for oneof in msg:oneofs() do
table.insert(oneofs_layout_order, oneof)
end
table.sort(oneofs_layout_order, function(a, b)
return a:name() < b:name()
end)
for _, oneof in ipairs(oneofs_layout_order) do
oneof_indexes[oneof] = oneof_count
oneof_count = oneof_count + 1
end
-- Create a layout order for fields. We use this order for the struct and
-- for offsets, but our list of fields we keep in field number order.
@ -301,6 +335,8 @@ local function write_c_file(filedef, hfilename, append)
end)
append('struct %s {\n', msgname)
-- Non-oneof fields.
for _, field in ipairs(fields_layout_order) do
field_count = field_count + 1
@ -309,15 +345,46 @@ local function write_c_file(filedef, hfilename, append)
submsg_set[field:subdef()] = true
end
if has_hasbit(field) then
hasbit_indexes[field] = hasbit_count
hasbit_count = hasbit_count + 1
if field:containing_oneof() then
-- Do nothing now
else
if has_hasbit(field) then
hasbit_indexes[field] = hasbit_count
hasbit_count = hasbit_count + 1
end
append(' %s %s;\n', ctype(field), field:name())
end
end
append(' %s %s;\n', ctype(field), field:name())
local oneof_last_fields = {}
-- Oneof fields.
for oneof in msg:oneofs() do
local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name())
append(' union {\n')
oneof_last_fields[oneof] = ""
for field in oneof:fields() do
oneof_last_fields[oneof] = field:name()
append(' %s %s;\n', ctype(field), field:name())
end
append(' } %s;\n', oneof:name())
append(' %s_oneofcases %s_case;\n', fullname, oneof:name())
end
append('};\n\n')
if oneof_count > 0 then
local oneofs_array_name = msgname .. "_oneofs"
oneofs_array_ref = "&" .. oneofs_array_name .. "[0]"
append('static const upb_msglayout_oneofinit_v1 %s[%s] = {\n',
oneofs_array_name, oneof_count)
for _, oneof in ipairs(oneofs_layout_order) do
append(' {offsetof(%s, %s), offsetof(%s, %s_case)},\n',
msgname, oneof:name(), msgname, oneof:name())
end
append('};\n\n')
end
if submsg_count > 0 then
-- TODO(haberman): could save a little bit of space by only generating a
-- "submsgs" array for every strongly-connected component.
@ -354,11 +421,14 @@ local function write_c_file(filedef, hfilename, append)
if field:type() == upb.TYPE_MESSAGE then
submsg_index = submsg_indexes[field:subdef()]
end
if field:containing_oneof() then
oneof_index = oneof_indexes[field:containing_oneof()]
end
-- TODO(haberman): oneofs.
append(' {%s, offsetof(%s, %s), %s, %s, %s, %s, %s},\n',
field:number(),
msgname,
field:name(),
(field:containing_oneof() and field:containing_oneof():name()) or field:name(),
hasbit_indexes[field] or "-1",
oneof_index,
submsg_index,
@ -371,7 +441,7 @@ local function write_c_file(filedef, hfilename, append)
append('const upb_msglayout_msginit_v1 %s_msginit = {\n', msgname)
append(' %s,\n', submsgs_array_ref)
append(' %s,\n', fields_array_ref)
append(' NULL, /* TODO. oneofs */\n')
append(' %s,\n', oneofs_array_ref)
append(' NULL, /* TODO. default_msg */\n')
append(' UPB_ALIGNED_SIZEOF(%s), %s, %s, %s, %s\n',
msgname, field_count,
@ -390,36 +460,49 @@ local function write_c_file(filedef, hfilename, append)
append('%s *%s_parsenew(upb_stringview buf, upb_env *env) {\n',
msgname, msgname)
append(' UPB_UNUSED(buf);\n')
append(' UPB_UNUSED(env);\n')
append(' return NULL;\n')
append(' %s *msg = %s_new(env);\n', msgname, msgname)
append(' if (upb_decode(buf, msg, &%s_msginit, env)) {\n', msgname)
append(' return msg;\n')
append(' } else {\n')
append(' return NULL;\n')
append(' }\n')
append('}\n')
append('char *%s_serialize(%s *msg, upb_env *env, size_t *size) {\n',
msgname, msgname)
append(' UPB_UNUSED(msg);\n')
append(' UPB_UNUSED(env);\n')
append(' UPB_UNUSED(size);\n')
append(' return NULL; /* TODO. */\n')
append(' return upb_encode(msg, &%s_msginit, env, size);\n', msgname)
append('}\n')
for field in msg:fields() do
local typename = ctype(field)
append('%s %s_%s(const %s *msg) {\n',
typename, msgname, field:name(), msgname);
append(' return msg->%s;\n', field:name())
if field:containing_oneof() then
local oneof = field:containing_oneof()
append(' return msg->%s_case == %s ? msg->%s.%s : %s;\n',
oneof:name(), field:number(), oneof:name(), field:name(),
field_default(field))
else
append(' return msg->%s;\n', field:name())
end
append('}\n')
append('void %s_set_%s(%s *msg, %s value) {\n',
msgname, field:name(), msgname, typename);
append(' msg->%s = value;\n', field:name())
if field:containing_oneof() then
local oneof = field:containing_oneof()
append(' msg->%s.%s = value;\n', oneof:name(), field:name())
append(' msg->%s_case = %s;\n', oneof:name(), field:number())
else
append(' msg->%s = value;\n', field:name())
end
append('}\n')
end
for oneof in msg:oneofs() do
local fullname = to_cident(oneof:containing_type():full_name() .. "." .. oneof:name())
append('%s_oneofcases %s_case(const %s *msg) {\n', fullname, fullname, msgname)
append(' return 0; /* TODO. */')
append('}')
append(' return msg->%s_case;\n', oneof:name())
append('}\n')
end
end
end

@ -0,0 +1,247 @@
#include "upb/decode.h"
typedef enum {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
} upb_wiretype_t;
static void upb_decode_seterr(upb_env *env, const char *msg) {
upb_status status = UPB_STATUS_INIT;
upb_status_seterrmsg(&status, msg);
upb_env_reporterror(env, &status);
}
static bool upb_decode_varint(const char **ptr, const char *limit,
uint64_t *val) {
uint8_t byte = 0x80;
int bitpos = 0;
const char *p = *ptr;
*val = 0;
while (byte & 0x80) {
if (bitpos == 70 || p == limit) {
return false;
}
byte = *p;
*val |= (uint64_t)(byte & 0x7F) << bitpos;
p++;
bitpos += 7;
}
*ptr = p;
return true;
}
static bool upb_decode_varint32(const char **ptr, const char *limit,
uint32_t *val) {
uint64_t u64;
if (!upb_decode_varint(ptr, limit, &u64) || u64 > UINT32_MAX) {
return false;
} else {
*val = u64;
return true;
}
}
static const upb_msglayout_fieldinit_v1 *upb_find_field(
const upb_msglayout_msginit_v1 *l, uint32_t field_number) {
/* Lots of optimization opportunities here. */
int i;
for (i = 0; i < l->field_count; i++) {
if (l->fields[i].number == field_number) {
return &l->fields[i];
}
}
return NULL; /* Unknown field. */
}
static bool upb_decode_64bit(const char **ptr, const char *limit,
uint64_t *val) {
if (limit - *ptr < 8) {
return false;
} else {
memcpy(val, *ptr, 8);
*ptr += 8;
return true;
}
}
static bool upb_decode_32bit(const char **ptr, const char *limit,
uint32_t *val) {
if (limit - *ptr < 4) {
return false;
} else {
memcpy(val, *ptr, 4);
*ptr += 4;
return true;
}
}
static int32_t upb_zzdec_32(uint32_t n) {
return (n >> 1) ^ -(int32_t)(n & 1);
}
static int64_t upb_zzdec_64(uint64_t n) {
return (n >> 1) ^ -(int64_t)(n & 1);
}
static bool upb_decode_string(const char **ptr, const char *limit,
upb_stringview *val) {
uint32_t len;
if (!upb_decode_varint32(ptr, limit, &len) ||
limit - *ptr < len) {
return false;
}
*val = upb_stringview_make(*ptr, len);
*ptr += len;
return true;
}
static void upb_set32(void *msg, size_t ofs, uint32_t val) {
memcpy((char*)msg + ofs, &val, sizeof(val));
}
bool upb_append_unknown(const char **ptr, const char *start, const char *limit,
char *msg) {
UPB_UNUSED(limit);
UPB_UNUSED(msg);
*ptr = limit;
return true;
}
bool upb_decode_field(const char **ptr, const char *limit, char *msg,
const upb_msglayout_msginit_v1 *l, upb_env *env) {
uint32_t tag;
uint32_t wire_type;
uint32_t field_number;
const char *p = *ptr;
const char *field_start = p;
const upb_msglayout_fieldinit_v1 *f;
if (!upb_decode_varint32(&p, limit, &tag)) {
upb_decode_seterr(env, "Error decoding tag.\n");
return false;
}
wire_type = tag & 0x7;
field_number = tag >> 3;
if (field_number == 0) {
return false;
}
f = upb_find_field(l, field_number);
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: {
uint64_t val;
if (!upb_decode_varint(&p, limit, &val)) {
upb_decode_seterr(env, "Error decoding varint value.\n");
return false;
}
if (!f) {
return upb_append_unknown(ptr, field_start, p, msg);
}
switch (f->type) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
memcpy(msg + f->offset, &val, sizeof(val));
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM: {
uint32_t val32 = val;
memcpy(msg + f->offset, &val32, sizeof(val32));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT32: {
int32_t decoded = upb_zzdec_32(val);
memcpy(msg + f->offset, &decoded, sizeof(decoded));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT64: {
int64_t decoded = upb_zzdec_64(val);
memcpy(msg + f->offset, &decoded, sizeof(decoded));
break;
}
default:
return upb_append_unknown(ptr, field_start, p, msg);
}
break;
}
case UPB_WIRE_TYPE_64BIT: {
uint64_t val;
if (!upb_decode_64bit(&p, limit, &val)) {
upb_decode_seterr(env, "Error decoding 64bit value.\n");
return false;
}
if (!f) {
return upb_append_unknown(ptr, field_start, p, msg);
}
break;
}
case UPB_WIRE_TYPE_32BIT: {
uint32_t val;
if (!upb_decode_32bit(&p, limit, &val)) {
upb_decode_seterr(env, "Error decoding 32bit value.\n");
return false;
}
if (!f) {
return upb_append_unknown(ptr, field_start, p, msg);
}
break;
}
case UPB_WIRE_TYPE_DELIMITED: {
upb_stringview val;
if (!upb_decode_string(&p, limit, &val)) {
upb_decode_seterr(env, "Error decoding delimited value.\n");
return false;
}
if (!f) {
return upb_append_unknown(ptr, field_start, p, msg);
}
memcpy(msg + f->offset, &val, sizeof(val));
break;
}
}
if (f->oneof_index != UPB_NOT_IN_ONEOF) {
upb_set32(msg, l->oneofs[f->oneof_index].case_offset, f->number);
}
*ptr = p;
return true;
}
bool upb_decode(upb_stringview buf, void *msg_void,
const upb_msglayout_msginit_v1 *l, upb_env *env) {
char *msg = msg_void;
const char *ptr = buf.data;
const char *limit = ptr + buf.size;
while (ptr < limit) {
if (!upb_decode_field(&ptr, limit, msg, l, env)) {
return false;
}
}
return true;
}

@ -0,0 +1,17 @@
/*
** upb_decode: parsing into a upb_msg using a upb_msglayout.
*/
#ifndef UPB_DECODE_H_
#define UPB_DECODE_H_
#include "upb/msg.h"
UPB_BEGIN_EXTERN_C
bool upb_decode(upb_stringview buf, void *msg,
const upb_msglayout_msginit_v1 *l, upb_env *env);
UPB_END_EXTERN_C
#endif /* UPB_DECODE_H_ */

@ -0,0 +1,512 @@
#include "upb/encode.h"
#include "upb/structs.int.h"
#define UPB_PB_VARINT_MAX_LEN 10
static size_t upb_encode_varint(uint64_t val, char *buf) {
size_t i;
if (val == 0) { buf[0] = 0; return 1; }
i = 0;
while (val) {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
buf[i++] = byte;
}
return i;
}
static size_t upb_varint_size(uint64_t val) {
char buf[UPB_PB_VARINT_MAX_LEN];
return upb_encode_varint(val, buf);
}
static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
typedef enum {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
} upb_wiretype_t;
/* Index is descriptor type. */
const uint8_t upb_native_wiretypes[] = {
UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
UPB_WIRE_TYPE_64BIT, /* DOUBLE */
UPB_WIRE_TYPE_32BIT, /* FLOAT */
UPB_WIRE_TYPE_VARINT, /* INT64 */
UPB_WIRE_TYPE_VARINT, /* UINT64 */
UPB_WIRE_TYPE_VARINT, /* INT32 */
UPB_WIRE_TYPE_64BIT, /* FIXED64 */
UPB_WIRE_TYPE_32BIT, /* FIXED32 */
UPB_WIRE_TYPE_VARINT, /* BOOL */
UPB_WIRE_TYPE_DELIMITED, /* STRING */
UPB_WIRE_TYPE_START_GROUP, /* GROUP */
UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
UPB_WIRE_TYPE_DELIMITED, /* BYTES */
UPB_WIRE_TYPE_VARINT, /* UINT32 */
UPB_WIRE_TYPE_VARINT, /* ENUM */
UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
UPB_WIRE_TYPE_VARINT, /* SINT32 */
UPB_WIRE_TYPE_VARINT, /* SINT64 */
};
/* The output buffer is divided into segments; a segment is a string of data
* that is "ready to go" -- it does not need any varint lengths inserted into
* the middle. The seams between segments are where varints will be inserted
* once they are known.
*
* We also use the concept of a "run", which is a range of encoded bytes that
* occur at a single submessage level. Every segment contains one or more runs.
*
* A segment can span messages. Consider:
*
* .--Submessage lengths---------.
* | | |
* | V V
* V | |--------------- | |-----------------
* Submessages: | |-----------------------------------------------
* Top-level msg: ------------------------------------------------------------
*
* Segments: ----- ------------------- -----------------
* Runs: *---- *--------------*--- *----------------
* (* marks the start)
*
* Note that the top-level menssage is not in any segment because it does not
* have any length preceding it.
*
* A segment is only interrupted when another length needs to be inserted. So
* observe how the second segment spans both the inner submessage and part of
* the next enclosing message. */
typedef struct {
uint32_t msglen; /* The length to varint-encode before this segment. */
uint32_t seglen; /* Length of the segment. */
} upb_segment;
typedef struct {
upb_env *env;
char *buf, *ptr, *limit;
/* The beginning of the current run, or undefined if we are at the top
* level. */
char *runbegin;
/* The list of segments we are accumulating. */
upb_segment *segbuf, *segptr, *seglimit;
/* The stack of enclosing submessages. Each entry in the stack points to the
* segment where this submessage's length is being accumulated. */
int *stack, *top, *stacklimit;
} upb_encstate;
static upb_segment *upb_encode_top(upb_encstate *e) {
return &e->segbuf[*e->top];
}
static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
char *new_buf;
size_t needed = bytes + (e->ptr - e->buf);
size_t old_size = e->limit - e->buf;
size_t new_size = old_size;
while (new_size < needed) {
new_size *= 2;
}
new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
e->ptr = new_buf + (e->ptr - e->buf);
e->runbegin = new_buf + (e->runbegin - e->buf);
e->limit = new_buf + new_size;
e->buf = new_buf;
return true;
}
/* Call to ensure that at least "bytes" bytes are available for writing at
* e->ptr. Returns false if the bytes could not be allocated. */
static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
if (UPB_LIKELY((size_t)(e->limit - e->ptr) >= bytes)) {
return true;
}
return upb_encode_growbuffer(e, bytes);
}
/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
* previously called reserve() with at least this many bytes. */
static void upb_encode_advance(upb_encstate *e, size_t bytes) {
UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
e->ptr += bytes;
}
/* Writes the given bytes to the buffer, handling reserve/advance. */
static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
if (!upb_encode_reserve(e, len)) {
return false;
}
memcpy(e->ptr, data, len);
upb_encode_advance(e, len);
return true;
}
/* Finish the current run by adding the run totals to the segment and message
* length. */
static void upb_encode_accumulate(upb_encstate *e) {
size_t run_len;
UPB_ASSERT(e->ptr >= e->runbegin);
run_len = e->ptr - e->runbegin;
e->segptr->seglen += run_len;
upb_encode_top(e)->msglen += run_len;
e->runbegin = e->ptr;
}
/* Call to indicate the start of delimited region for which the full length is
* not yet known. The length will be inserted at the current position once it
* is known (and subsequent data moved if necessary). */
static bool upb_encode_startdelim(upb_encstate *e) {
if (e->top) {
/* We are already buffering, advance to the next segment and push it on the
* stack. */
upb_encode_accumulate(e);
if (++e->top == e->stacklimit) {
/* TODO(haberman): grow stack? */
return false;
}
if (++e->segptr == e->seglimit) {
/* Grow segment buffer. */
size_t old_size =
(e->seglimit - e->segbuf) * sizeof(upb_segment);
size_t new_size = old_size * 2;
upb_segment *new_buf =
upb_env_realloc(e->env, e->segbuf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
e->segptr = new_buf + (e->segptr - e->segbuf);
e->seglimit = new_buf + (new_size / sizeof(upb_segment));
e->segbuf = new_buf;
}
} else {
/* We were previously at the top level, start buffering. */
e->segptr = e->segbuf;
e->top = e->stack;
e->runbegin = e->ptr;
}
*e->top = e->segptr - e->segbuf;
e->segptr->seglen = 0;
e->segptr->msglen = 0;
return true;
}
/* Call to indicate the end of a delimited region. We now know the length of
* the delimited region. If we are not nested inside any other delimited
* regions, we can now emit all of the buffered data we accumulated. */
static bool upb_encode_enddelim(upb_encstate *e) {
size_t msglen;
upb_encode_accumulate(e);
msglen = upb_encode_top(e)->msglen;
if (e->top == e->stack) {
/* All lengths are now available, emit all buffered data. */
char buf[UPB_PB_VARINT_MAX_LEN];
upb_segment *s;
const char *ptr = e->buf;
for (s = e->segbuf; s <= e->segptr; s++) {
size_t lenbytes = upb_encode_varint(s->msglen, buf);
//putbuf(e, buf, lenbytes);
//putbuf(e, ptr, s->seglen);
ptr += s->seglen;
}
e->ptr = e->buf;
e->top = NULL;
} else {
/* Need to keep buffering; propagate length info into enclosing
* submessages. */
--e->top;
upb_encode_top(e)->msglen += msglen + upb_varint_size(msglen);
}
return true;
}
/* encoding of wire types *****************************************************/
static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
/* TODO(haberman): byte-swap for big endian. */
return upb_put_bytes(e, &val, sizeof(uint64_t));
}
static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
/* TODO(haberman): byte-swap for big endian. */
return upb_put_bytes(e, &val, sizeof(uint32_t));
}
static bool upb_put_varint(upb_encstate *e, uint64_t val) {
if (!upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)) {
return false;
}
upb_encode_advance(e, upb_encode_varint(val, e->ptr));
return true;
}
static bool upb_put_double(upb_encstate *e, double d) {
uint64_t u64;
UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
memcpy(&u64, &d, sizeof(uint64_t));
return upb_put_fixed64(e, u64);
}
static bool upb_put_float(upb_encstate *e, float d) {
uint32_t u32;
UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
memcpy(&u32, &d, sizeof(uint32_t));
return upb_put_fixed32(e, u32);
}
static uint32_t upb_readcase(const char *msg, const upb_msglayout_msginit_v1 *m,
int oneof_index) {
uint32_t ret;
memcpy(&ret, msg + m->oneofs[oneof_index].case_offset, sizeof(ret));
return ret;
}
static bool upb_readhasbit(const char *msg,
const upb_msglayout_fieldinit_v1 *f) {
UPB_ASSERT(f->hasbit != UPB_NO_HASBIT);
return msg[f->hasbit / 8] & (1 << (f->hasbit % 8));
}
static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
return upb_put_varint(e, (field_number << 3) | wire_type);
}
static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
size_t size) {
size_t bytes = arr->len * size;
return upb_put_varint(e, bytes) && upb_put_bytes(e, arr->data, bytes);
}
bool upb_encode_message(upb_encstate *e, const char *msg,
const upb_msglayout_msginit_v1 *m);
static bool upb_encode_array(upb_encstate *e, const char *field_mem,
const upb_msglayout_msginit_v1 *m,
const upb_msglayout_fieldinit_v1 *f) {
const upb_array *arr = *(const upb_array**)field_mem;
if (arr->len == 0) {
return true;
}
/* We encode all primitive arrays as packed, regardless of what was specified
* in the .proto file. Could special case 1-sized arrays. */
if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) {
return false;
}
#define VARINT_CASE(ctype, encode) { \
uint64_t *data = arr->data; \
uint64_t *limit = data + arr->len; \
if (!upb_encode_startdelim(e)) { \
return false; \
} \
for (; data < limit; data++) { \
if (!upb_put_varint(e, encode)) { \
return false; \
} \
} \
return upb_encode_enddelim(e); \
}
switch (f->type) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
return upb_put_fixedarray(e, arr, sizeof(double));
case UPB_DESCRIPTOR_TYPE_FLOAT:
return upb_put_fixedarray(e, arr, sizeof(float));
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
return upb_put_fixedarray(e, arr, sizeof(uint64_t));
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
return upb_put_fixedarray(e, arr, sizeof(uint32_t));
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
VARINT_CASE(uint64_t, *data);
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
VARINT_CASE(uint32_t, *data);
case UPB_DESCRIPTOR_TYPE_BOOL:
VARINT_CASE(bool, *data);
case UPB_DESCRIPTOR_TYPE_SINT32:
VARINT_CASE(int32_t, upb_zzenc_32(*data));
case UPB_DESCRIPTOR_TYPE_SINT64:
VARINT_CASE(int64_t, upb_zzenc_64(*data));
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
upb_stringview *data = arr->data;
upb_stringview *limit = data + arr->len;
goto put_string_data; /* Skip first tag, we already put it. */
for (; data < limit; data++) {
if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) {
return false;
}
put_string_data:
if (!upb_put_varint(e, data->size) ||
!upb_put_bytes(e, data->data, data->size)) {
return false;
}
}
}
case UPB_DESCRIPTOR_TYPE_GROUP:
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
void **data = arr->data;
void **limit = data + arr->len;
const upb_msglayout_msginit_v1 *subm = m->submsgs[f->submsg_index];
goto put_submsg_data; /* Skip first tag, we already put it. */
for (; data < limit; data++) {
if (!upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)) {
return false;
}
put_submsg_data:
if (!upb_encode_startdelim(e) ||
!upb_encode_message(e, *data, subm) ||
!upb_encode_enddelim(e)) {
return false;
}
}
}
}
UPB_UNREACHABLE();
#undef VARINT_CASE
}
static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
const upb_msglayout_msginit_v1 *m,
const upb_msglayout_fieldinit_v1 *f,
bool is_proto3) {
#define CASE(ctype, type, wire_type, encodeval) { \
ctype val = *(ctype*)field_mem; \
if (is_proto3 && val == 0) { \
return true; \
} \
return upb_put_tag(e, f->number, wire_type) && \
upb_put_ ## type(e, encodeval); \
}
switch (f->type) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
CASE(double, double, UPB_WIRE_TYPE_64BIT, val)
case UPB_DESCRIPTOR_TYPE_FLOAT:
CASE(float, float, UPB_WIRE_TYPE_32BIT, val)
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val)
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val)
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val)
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val)
case UPB_DESCRIPTOR_TYPE_BOOL:
CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val)
case UPB_DESCRIPTOR_TYPE_SINT32:
CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_32(val))
case UPB_DESCRIPTOR_TYPE_SINT64:
CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzenc_64(val))
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
upb_stringview view = *(upb_stringview*)field_mem;
if (is_proto3 && view.size == 0) {
return true;
}
return upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED) &&
upb_put_varint(e, view.size) &&
upb_put_bytes(e, view.data, view.size);
}
case UPB_DESCRIPTOR_TYPE_GROUP:
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
void *submsg = *(void**)field_mem;
if (is_proto3 && submsg == NULL) {
return true;
}
return upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED) &&
upb_encode_startdelim(e) &&
upb_encode_message(e, submsg, m->submsgs[f->submsg_index]) &&
upb_encode_enddelim(e);
}
}
#undef CASE
UPB_UNREACHABLE();
}
bool upb_encode_hasscalarfield(const char *msg,
const upb_msglayout_msginit_v1 *m,
const upb_msglayout_fieldinit_v1 *f) {
if (f->oneof_index != UPB_NOT_IN_ONEOF) {
return upb_readcase(msg, m, f->oneof_index) == f->number;
} else if (m->is_proto2) {
return upb_readhasbit(msg, f);
} else {
/* For proto3, we'll test for the field being empty later. */
return true;
}
}
bool upb_encode_message(upb_encstate* e, const char *msg,
const upb_msglayout_msginit_v1 *m) {
int i;
for (i = 0; i < m->field_count; i++) {
const upb_msglayout_fieldinit_v1 *f = &m->fields[i];
if (f->label == UPB_LABEL_REPEATED) {
if (!upb_encode_array(e, msg, m, f)) {
return NULL;
}
} else {
if (upb_encode_hasscalarfield(msg, m, f) &&
!upb_encode_scalarfield(e, msg + f->offset, m, f, !m->is_proto2)) {
return NULL;
}
}
}
return true;
}
char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *m,
upb_env *env, size_t *size) {
upb_encstate e;
if (!upb_encode_message(&e, msg, m)) {
return false;
}
*size = e.ptr - e.buf;
return e.buf;
}

@ -0,0 +1,17 @@
/*
** upb_encode: parsing into a upb_msg using a upb_msglayout.
*/
#ifndef UPB_ENCODE_H_
#define UPB_ENCODE_H_
#include "upb/msg.h"
UPB_BEGIN_EXTERN_C
char *upb_encode(const void *msg, const upb_msglayout_msginit_v1 *l,
upb_env *env, size_t *size);
UPB_END_EXTERN_C
#endif /* UPB_ENCODE_H_ */

@ -1,5 +1,6 @@
#include "upb/msg.h"
#include "upb/structs.int.h"
static bool is_power_of_two(size_t val) {
return (val & (val - 1)) == 0;
@ -791,15 +792,6 @@ void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
/** upb_array *****************************************************************/
struct upb_array {
upb_fieldtype_t type;
uint8_t element_size;
void *data; /* Each element is element_size. */
size_t len; /* Measured in elements. */
size_t size; /* Measured in elements. */
upb_alloc *alloc;
};
#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
size_t upb_array_sizeof(upb_fieldtype_t type) {

@ -386,6 +386,7 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
/** Interfaces for generated code *********************************************/
#define UPB_NOT_IN_ONEOF UINT16_MAX
#define UPB_NO_HASBIT UINT16_MAX
typedef struct {
uint32_t number;

@ -0,0 +1,18 @@
/*
** structs.int.h: structures definitions that are internal to upb.
*/
#ifndef UPB_STRUCTS_H_
#define UPB_STRUCTS_H_
struct upb_array {
upb_fieldtype_t type;
uint8_t element_size;
void *data; /* Each element is element_size. */
size_t len; /* Measured in elements. */
size_t size; /* Measured in elements. */
upb_alloc *alloc;
};
#endif /* UPB_STRUCTS_H_ */

@ -34,6 +34,9 @@ template <int N> class InlinedEnvironment;
#define UPB_INLINE static
#endif
/* Hints to the compiler about likely/unlikely branches. */
#define UPB_LIKELY(x) __builtin_expect((x),1)
/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
* doesn't provide these preprocessor symbols. */
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)

Loading…
Cancel
Save