diff --git a/.github/workflows/bazel_tests.yml b/.github/workflows/bazel_tests.yml index e184634684..be4dce05fe 100644 --- a/.github/workflows/bazel_tests.yml +++ b/.github/workflows/bazel_tests.yml @@ -24,7 +24,7 @@ jobs: include: - { NAME: "Fastbuild", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "" } - { NAME: "Optmized", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "-c opt" } # Some warnings only fire with -c opt - - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc, os: ubuntu-20.04, flags: "-c opt" } + - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc-12, os: ubuntu-22.04, flags: "-c opt" } - { NAME: "FastTable", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } - { NAME: "ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } - { NAME: "UBSAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -lua/...", install: "libunwind-dev" } @@ -53,6 +53,10 @@ jobs: wget -O $FILENAME https://github.com/bazelbuild/bazel/releases/download/$VERSION/${{ matrix.BAZEL }} chmod a+x $FILENAME if: ${{ matrix.BAZEL != 'bazel' }} + - name: Check tool versions + run: | + ${{ matrix.CC }} --version + ${{ matrix.BAZEL }} --version - name: Set up Bazel read/write caching run: echo "BAZEL_CACHE_AUTH=--google_default_credentials" >> $GITHUB_ENV if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }} diff --git a/BUILD b/BUILD index 743b438318..60f2d1302c 100644 --- a/BUILD +++ b/BUILD @@ -31,12 +31,14 @@ load( ) load( "//bazel:upb_proto_library.bzl", - "upb_fasttable_enabled", "upb_proto_library", "upb_proto_library_copts", "upb_proto_reflection_library", ) + load("@rules_pkg//:mappings.bzl", "pkg_files", "strip_prefix") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") + # begin:google_only # load( @@ -70,7 +72,7 @@ config_setting( visibility = ["//visibility:public"], ) -upb_fasttable_enabled( +bool_flag( name = "fasttable_enabled", build_setting_default = False, visibility = ["//visibility:public"], @@ -267,12 +269,14 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":collections_internal", + ":eps_copy_input_stream", ":hash", ":message_internal", ":mini_table_internal", ":port", ":upb", ":wire", + ":wire_reader", ], ) @@ -577,10 +581,13 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":collections_internal", + ":eps_copy_input_stream", ":lex", ":port", ":reflection", ":wire", + ":wire_reader", + ":wire_types", ], ) @@ -909,7 +916,6 @@ cc_library( hdrs = [ "upb/wire/decode.h", "upb/wire/encode.h", - "upb/wire/types.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], @@ -935,29 +941,63 @@ cc_library( "upb/wire/decode_fast.h", "upb/wire/decode_internal.h", "upb/wire/encode.h", - "upb/wire/eps_copy_input_stream.h", "upb/wire/swap_internal.h", - "upb/wire/types.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//:__subpackages__"], deps = [ ":base", ":collections_internal", + ":eps_copy_input_stream", ":hash", ":mem_internal", ":message_internal", ":mini_table_internal", ":port", + ":wire_reader", + ":wire_types", "@utf8_range", ], ) +cc_library( + name = "wire_types", + hdrs = ["upb/wire/types.h"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "eps_copy_input_stream", + srcs = ["upb/wire/eps_copy_input_stream.c"], + hdrs = ["upb/wire/eps_copy_input_stream.h"], + visibility = ["//visibility:public"], + deps = [ + ":mem", + ":port", + ], +) + +cc_library( + name = "wire_reader", + srcs = [ + "upb/wire/reader.c", + "upb/wire/swap_internal.h", + ], + hdrs = ["upb/wire/reader.h"], + visibility = ["//visibility:public"], + deps = [ + ":eps_copy_input_stream", + ":port", + ":wire_types", + ], +) + cc_test( name = "eps_copy_input_stream_test", srcs = ["upb/wire/eps_copy_input_stream_test.cc"], deps = [ - ":wire_internal", + ":eps_copy_input_stream", + ":upb", "@com_google_googletest//:gtest_main", ], ) @@ -1014,6 +1054,7 @@ upb_amalgamation( ":base", ":collections_internal", ":descriptor_upb_proto", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":lex", @@ -1026,6 +1067,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], strip_import_prefix = ["src"], ) @@ -1049,6 +1092,7 @@ upb_amalgamation( ":collections_internal", ":descriptor_upb_proto", ":descriptor_upb_proto_reflection", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":json", @@ -1062,6 +1106,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], prefix = "php-", strip_import_prefix = ["src"], @@ -1086,6 +1132,7 @@ upb_amalgamation( ":base", ":collections_internal", ":descriptor_upb_proto", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":json", @@ -1099,6 +1146,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], prefix = "ruby-", strip_import_prefix = ["src"], diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index e7ebb72d7f..88ef5a605f 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -167,29 +167,6 @@ def _cc_library_func(ctx, name, hdrs, srcs, copts, includes, dep_ccinfos): linking_context = linking_context, ) -# Build setting for whether fasttable code generation is enabled ############### - -_FastTableEnabledInfo = provider( - "Provides fasttable configuration", - fields = { - "enabled": "whether fasttable is enabled", - }, -) - -def fasttable_enabled_impl(ctx): - raw_setting = ctx.build_setting_value - - if raw_setting: - # TODO(haberman): check that the target CPU supports fasttable. - pass - - return _FastTableEnabledInfo(enabled = raw_setting) - -upb_fasttable_enabled = rule( - implementation = fasttable_enabled_impl, - build_setting = config.bool(flag = True), -) - # Dummy rule to expose select() copts to aspects ############################## UpbProtoLibraryCoptsInfo = provider( @@ -235,9 +212,6 @@ def _compile_upb_protos(ctx, generator, proto_info, proto_sources): srcs = [_generate_output_file(ctx, name, ext + ".c") for name in proto_sources] hdrs = [_generate_output_file(ctx, name, ext + ".h") for name in proto_sources] transitive_sets = proto_info.transitive_descriptor_sets.to_list() - fasttable_enabled = (hasattr(ctx.attr, "_fasttable_enabled") and - ctx.attr._fasttable_enabled[_FastTableEnabledInfo].enabled) - codegen_params = "fasttable:" if fasttable_enabled else "" ctx.actions.run( inputs = depset( direct = [proto_info.direct_descriptor_set], @@ -247,7 +221,7 @@ def _compile_upb_protos(ctx, generator, proto_info, proto_sources): outputs = srcs + hdrs, executable = ctx.executable._protoc, arguments = [ - "--" + generator + "_out=" + codegen_params + _get_real_root(ctx, srcs[0]), + "--" + generator + "_out=" + _get_real_root(ctx, srcs[0]), "--plugin=protoc-gen-" + generator + "=" + tool.path, "--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]), ] + diff --git a/cmake/make_cmakelists.py b/cmake/make_cmakelists.py index e66b784a79..0b63de5fd9 100755 --- a/cmake/make_cmakelists.py +++ b/cmake/make_cmakelists.py @@ -200,6 +200,9 @@ class BuildFileFunctions(object): def package_group(self, **kwargs): pass + def bool_flag(self, **kwargs): + pass + class WorkspaceFileFunctions(object): def __init__(self, converter): diff --git a/cmake/staleness_test_lib.py b/cmake/staleness_test_lib.py index 171d2be0fc..4887f24855 100644 --- a/cmake/staleness_test_lib.py +++ b/cmake/staleness_test_lib.py @@ -33,6 +33,7 @@ This code is used by test scripts generated from staleness_test() rules. from __future__ import absolute_import from __future__ import print_function +import difflib import sys import os from shutil import copyfile @@ -171,7 +172,10 @@ def CheckFilesMatch(config): continue for pair in stale_files: - diff_errors.append("File %s is out of date" % pair.target) + with open(pair.generated) as g, open(pair.target) as t: + diff = ''.join(difflib.unified_diff(g.read().splitlines(keepends=True), + t.read().splitlines(keepends=True))) + diff_errors.append("File %s is out of date:\n%s" % (pair.target, diff)) if diff_errors: error_msg = "Files out of date!\n\n" diff --git a/lua/msg.c b/lua/msg.c index 010671b597..094ec570d6 100644 --- a/lua/msg.c +++ b/lua/msg.c @@ -579,7 +579,7 @@ static int lupb_Map_Newindex(lua_State* L) { upb_MessageValue key = lupb_tomsgval(L, lmap->key_type, 2, 1, LUPB_REF); if (lua_isnil(L, 3)) { - upb_Map_Delete(map, key); + upb_Map_Delete(map, key, NULL); } else { upb_MessageValue val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY); upb_Map_Set(map, key, val, lupb_Arenaget(L, 1)); diff --git a/protos/protos_internal_test.cc b/protos/protos_internal_test.cc index 835601c549..99bfaa2402 100644 --- a/protos/protos_internal_test.cc +++ b/protos/protos_internal_test.cc @@ -40,9 +40,9 @@ TEST(CppGeneratedCode, InternalMoveMessage) { upb_Arena* source_arena = upb_Arena_New(); protos_generator_test_TestModel* message = protos_generator_test_TestModel_new(source_arena); + ASSERT_NE(message, nullptr); protos_generator_test_TestModel_set_int_value_with_default(message, 123); - EXPECT_NE(message, nullptr); // Move ownership. TestModel model = protos::internal::MoveMessage(message, source_arena); diff --git a/protos_generator/tests/test_generated.cc b/protos_generator/tests/test_generated.cc index 558476c60a..416d9f466c 100644 --- a/protos_generator/tests/test_generated.cc +++ b/protos_generator/tests/test_generated.cc @@ -425,7 +425,9 @@ TEST(CppGeneratedCode, MessageMapStringKeyAndInt32Value) { test_model.clear_str_to_int_map(); EXPECT_EQ(0, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("first", 10); + EXPECT_EQ(1, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("second", 20); + EXPECT_EQ(2, test_model.str_to_int_map_size()); auto result = test_model.get_str_to_int_map("second"); EXPECT_EQ(true, result.ok()); EXPECT_EQ(20, result.value()); diff --git a/python/BUILD b/python/BUILD index 114346055a..8471eb7c99 100644 --- a/python/BUILD +++ b/python/BUILD @@ -233,12 +233,15 @@ py_extension( deps = [ "//:collections", "//:descriptor_upb_proto_reflection", + "//:eps_copy_input_stream", "//:hash", "//:port", "//:reflection", "//:textformat", "//:upb", "//:wire", + "//:wire_reader", + "//:wire_types", "//upb/util:compare", "//upb/util:def_to_proto", "//upb/util:required_fields", diff --git a/python/map.c b/python/map.c index b3a8fff11f..eea57e63d4 100644 --- a/python/map.c +++ b/python/map.c @@ -182,7 +182,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key, if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1; if (!PyUpb_MapContainer_Set(self, map, u_key, u_val, arena)) return -1; } else { - if (!upb_Map_Delete(map, u_key)) { + if (!upb_Map_Delete(map, u_key, NULL)) { PyErr_Format(PyExc_KeyError, "Key not present in map"); return -1; } diff --git a/python/unknown_fields.c b/python/unknown_fields.c index 5dc96fb723..fd7716d732 100644 --- a/python/unknown_fields.c +++ b/python/unknown_fields.c @@ -29,21 +29,10 @@ #include "python/message.h" #include "python/protobuf.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/types.h" -static const char* PyUpb_DecodeVarint(const char* ptr, const char* end, - uint64_t* val) { - *val = 0; - for (int i = 0; ptr < end && i < 10; i++, ptr++) { - uint64_t byte = (uint8_t)*ptr; - *val |= (byte & 0x7f) << (i * 7); - if ((byte & 0x80) == 0) { - return ptr + 1; - } - } - return NULL; -} - // ----------------------------------------------------------------------------- // UnknownFieldSet // ----------------------------------------------------------------------------- @@ -66,60 +55,6 @@ PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) { return self; } -// Generic functions to skip a value or group. - -static const char* PyUpb_UnknownFieldSet_SkipGroup(const char* ptr, - const char* end, - int group_number); - -static const char* PyUpb_UnknownFieldSet_SkipField(const char* ptr, - const char* end, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return PyUpb_DecodeVarint(ptr, end, &val); - } - case kUpb_WireType_64Bit: - if (end - ptr < 8) return NULL; - return ptr + 8; - case kUpb_WireType_32Bit: - if (end - ptr < 4) return NULL; - return ptr + 4; - case kUpb_WireType_Delimited: { - uint64_t size; - ptr = PyUpb_DecodeVarint(ptr, end, &size); - if (!ptr || end - ptr < size) return NULL; - return ptr + size; - } - case kUpb_WireType_StartGroup: - return PyUpb_UnknownFieldSet_SkipGroup(ptr, end, field_number); - case kUpb_WireType_EndGroup: - return NULL; - default: - assert(0); - return NULL; - } -} - -static const char* PyUpb_UnknownFieldSet_SkipGroup(const char* ptr, - const char* end, - int group_number) { - uint32_t end_tag = (group_number << 3) | kUpb_WireType_EndGroup; - while (true) { - if (ptr == end) return NULL; - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); - if (!ptr) return NULL; - if (tag == end_tag) return ptr; - ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag); - if (!ptr) return NULL; - } - return ptr; -} - // For MessageSet the established behavior is for UnknownFieldSet to interpret // the MessageSet wire format: // message MessageSet { @@ -143,40 +78,43 @@ enum { }; static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem( - PyUpb_UnknownFieldSet* self, const char* ptr, const char* end) { + PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, + const char* ptr) { PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); int type_id = 0; PyObject* msg = NULL; - while (true) { - if (ptr == end) goto err; - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) goto err; switch (tag) { case kUpb_MessageSet_EndItemTag: goto done; case kUpb_MessageSet_TypeIdTag: { uint64_t tmp; - ptr = PyUpb_DecodeVarint(ptr, end, &tmp); + ptr = upb_WireReader_ReadVarint(ptr, &tmp); if (!ptr) goto err; if (!type_id) type_id = tmp; break; } case kUpb_MessageSet_MessageTag: { - uint64_t size; - ptr = PyUpb_DecodeVarint(ptr, end, &size); - if (!ptr || end - ptr < size) goto err; + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) { + goto err; + } + const char* str = ptr; + ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size); if (!msg) { - msg = PyBytes_FromStringAndSize(ptr, size); + msg = PyBytes_FromStringAndSize(str, size); if (!msg) goto err; } else { // already saw a message here so deliberately skipping the duplicate } - ptr += size; break; } default: - ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag); + ptr = upb_WireReader_SkipValue(ptr, tag, stream); if (!ptr) goto err; } } @@ -198,19 +136,21 @@ err: } static const char* PyUpb_UnknownFieldSet_BuildMessageSet( - PyUpb_UnknownFieldSet* self, const char* ptr, const char* end) { + PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, + const char* ptr) { self->fields = PyList_New(0); - while (ptr < end) { - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) goto err; if (tag == kUpb_MessageSet_StartItemTag) { - ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, ptr, end); + ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr); } else { - ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag); + ptr = upb_WireReader_SkipValue(ptr, tag, stream); } if (!ptr) goto err; } + if (upb_EpsCopyInputStream_IsError(stream)) goto err; return ptr; err: @@ -220,46 +160,50 @@ err: } static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, - const char* ptr, const char* end, + upb_EpsCopyInputStream* stream, + const char* ptr, int group_number); static const char* PyUpb_UnknownFieldSet_BuildValue( - PyUpb_UnknownFieldSet* self, const char* ptr, const char* end, - int field_number, int wire_type, int group_number, PyObject** data) { + PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, + const char* ptr, int field_number, int wire_type, int group_number, + PyObject** data) { switch (wire_type) { case kUpb_WireType_Varint: { uint64_t val; - ptr = PyUpb_DecodeVarint(ptr, end, &val); + ptr = upb_WireReader_ReadVarint(ptr, &val); if (!ptr) return NULL; *data = PyLong_FromUnsignedLongLong(val); return ptr; } case kUpb_WireType_64Bit: { - if (end - ptr < 8) return NULL; uint64_t val; - memcpy(&val, ptr, 8); + ptr = upb_WireReader_ReadFixed64(ptr, &val); *data = PyLong_FromUnsignedLongLong(val); - return ptr + 8; + return ptr; } case kUpb_WireType_32Bit: { - if (end - ptr < 4) return NULL; uint32_t val; - memcpy(&val, ptr, 4); + ptr = upb_WireReader_ReadFixed32(ptr, &val); *data = PyLong_FromUnsignedLongLong(val); - return ptr + 4; + return ptr; } case kUpb_WireType_Delimited: { - uint64_t size; - ptr = PyUpb_DecodeVarint(ptr, end, &size); - if (!ptr || end - ptr < size) return NULL; - *data = PyBytes_FromStringAndSize(ptr, size); - return ptr + size; + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) { + return NULL; + } + const char* str = ptr; + ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size); + *data = PyBytes_FromStringAndSize(str, size); + return ptr; } case kUpb_WireType_StartGroup: { PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare(); if (!sub) return NULL; *data = &sub->ob_base; - return PyUpb_UnknownFieldSet_Build(sub, ptr, end, field_number); + return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number); } default: assert(0); @@ -271,22 +215,23 @@ static const char* PyUpb_UnknownFieldSet_BuildValue( // For non-MessageSet we just build the unknown fields exactly as they exist on // the wire. static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, - const char* ptr, const char* end, + upb_EpsCopyInputStream* stream, + const char* ptr, int group_number) { PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); self->fields = PyList_New(0); - while (ptr < end) { - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) goto err; PyObject* data = NULL; - int field_number = tag >> 3; - int wire_type = tag & 7; + int field_number = upb_WireReader_GetFieldNumber(tag); + int wire_type = upb_WireReader_GetWireType(tag); if (wire_type == kUpb_WireType_EndGroup) { if (field_number != group_number) return NULL; return ptr; } - ptr = PyUpb_UnknownFieldSet_BuildValue(self, ptr, end, field_number, + ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number, wire_type, group_number, &data); if (!ptr) { Py_XDECREF(data); @@ -298,6 +243,7 @@ static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, PyList_Append(self->fields, field); Py_DECREF(field); } + if (upb_EpsCopyInputStream_IsError(stream)) goto err; return ptr; err: @@ -324,14 +270,15 @@ static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args, const char* ptr = upb_Message_GetUnknown(msg, &size); if (size == 0) return &self->ob_base; - const char* end = ptr + size; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg); bool ok; if (upb_MessageDef_IsMessageSet(msgdef)) { - ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, ptr, end) == end; + ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL; } else { - ok = PyUpb_UnknownFieldSet_Build(self, ptr, end, -1) == end; + ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL; } if (!ok) { diff --git a/upb/collections/array.c b/upb/collections/array.c index a957f26ab8..fdb1bd206c 100644 --- a/upb/collections/array.c +++ b/upb/collections/array.c @@ -32,23 +32,22 @@ // Must be last. #include "upb/port/def.inc" -static const char _upb_CTypeo_sizelg2[12] = { - 0, - 0, /* kUpb_CType_Bool */ - 2, /* kUpb_CType_Float */ - 2, /* kUpb_CType_Int32 */ - 2, /* kUpb_CType_UInt32 */ - 2, /* kUpb_CType_Enum */ - UPB_SIZE(2, 3), /* kUpb_CType_Message */ - 3, /* kUpb_CType_Double */ - 3, /* kUpb_CType_Int64 */ - 3, /* kUpb_CType_UInt64 */ - UPB_SIZE(3, 4), /* kUpb_CType_String */ - UPB_SIZE(3, 4), /* kUpb_CType_Bytes */ +const char _upb_Array_CTypeSizeLg2Table[] = { + [kUpb_CType_Bool] = 0, + [kUpb_CType_Float] = 2, + [kUpb_CType_Int32] = 2, + [kUpb_CType_UInt32] = 2, + [kUpb_CType_Enum] = 2, + [kUpb_CType_Message] = UPB_SIZE(2, 3), + [kUpb_CType_Double] = 3, + [kUpb_CType_Int64] = 3, + [kUpb_CType_UInt64] = 3, + [kUpb_CType_String] = UPB_SIZE(3, 4), + [kUpb_CType_Bytes] = UPB_SIZE(3, 4), }; upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) { - return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]); + return _upb_Array_New(a, 4, _upb_Array_CTypeSizeLg2(type)); } size_t upb_Array_Size(const upb_Array* arr) { return arr->size; } @@ -142,35 +141,3 @@ bool _upb_array_realloc(upb_Array* arr, size_t min_capacity, upb_Arena* arena) { arr->capacity = new_capacity; return true; } - -static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2, - upb_Arena* arena) { - upb_Array* arr = *arr_ptr; - if (!arr) { - arr = _upb_Array_New(arena, 4, elem_size_lg2); - if (!arr) return NULL; - *arr_ptr = arr; - } - return arr; -} - -void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, - int elem_size_lg2, upb_Arena* arena) { - upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - return arr && _upb_Array_ResizeUninitialized(arr, size, arena) - ? _upb_array_ptr(arr) - : NULL; -} - -bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, - int elem_size_lg2, upb_Arena* arena) { - upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - if (!arr) return false; - - size_t elems = arr->size; - if (!_upb_Array_ResizeUninitialized(arr, elems + 1, arena)) return false; - - char* data = _upb_array_ptr(arr); - memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); - return true; -} diff --git a/upb/collections/array_internal.h b/upb/collections/array_internal.h index e1a4cefde2..0c126eb642 100644 --- a/upb/collections/array_internal.h +++ b/upb/collections/array_internal.h @@ -48,8 +48,14 @@ struct upb_Array { }; // LINT.ThenChange(GoogleInternalName1) +UPB_INLINE size_t _upb_Array_ElementSizeLg2(const upb_Array* arr) { + size_t ret = arr->data & 7; + UPB_ASSERT(ret <= 4); + return ret; +} + UPB_INLINE const void* _upb_array_constptr(const upb_Array* arr) { - UPB_ASSERT((arr->data & 7) <= 4); + _upb_Array_ElementSizeLg2(arr); // Check assertion. return (void*)(arr->data & ~(uintptr_t)7); } @@ -68,8 +74,15 @@ UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) { return (uintptr_t)ptr | (unsigned)elem_size_lg2; } +extern const char _upb_Array_CTypeSizeLg2Table[]; + +UPB_INLINE size_t _upb_Array_CTypeSizeLg2(upb_CType ctype) { + return _upb_Array_CTypeSizeLg2Table[ctype]; +} + UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_Array), UPB_MALLOC_ALIGN); const size_t bytes = arr_size + (init_capacity << elem_size_lg2); upb_Array* arr = (upb_Array*)upb_Arena_Malloc(a, bytes); @@ -83,12 +96,6 @@ UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, // Resizes the capacity of the array to be at least min_size. bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena); -// Fallback functions for when the accessors require a resize. -void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, - int elem_size_lg2, upb_Arena* arena); -bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, - int elem_size_lg2, upb_Arena* arena); - UPB_INLINE bool _upb_array_reserve(upb_Array* arr, size_t size, upb_Arena* arena) { if (arr->capacity < size) return _upb_array_realloc(arr, size, arena); @@ -103,98 +110,19 @@ UPB_INLINE bool _upb_Array_ResizeUninitialized(upb_Array* arr, size_t size, return true; } -UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { - *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; -} - -UPB_INLINE const void* _upb_array_accessor(const void* msg, size_t ofs, - size_t* size) { - const upb_Array* arr = *UPB_PTR_AT(msg, ofs, const upb_Array*); - if (arr) { - if (size) *size = arr->size; - return _upb_array_constptr(arr); - } else { - if (size) *size = 0; - return NULL; - } -} - -UPB_INLINE void* _upb_array_mutable_accessor(void* msg, size_t ofs, - size_t* size) { - upb_Array* arr = *UPB_PTR_AT(msg, ofs, upb_Array*); - if (arr) { - if (size) *size = arr->size; - return _upb_array_ptr(arr); - } else { - if (size) *size = 0; - return NULL; - } +// This function is intended for situations where elem_size is compile-time +// constant or a known expression of the form (1 << lg2), so that the expression +// i*elem_size does not result in an actual multiplication. +UPB_INLINE void _upb_Array_Set(upb_Array* arr, size_t i, const void* data, + size_t elem_size) { + UPB_ASSERT(i < arr->size); + UPB_ASSERT(elem_size == 1U << _upb_Array_ElementSizeLg2(arr)); + char* arr_data = (char*)_upb_array_ptr(arr); + memcpy(arr_data + (i * elem_size), data, elem_size); } -UPB_INLINE void* _upb_Array_Resize_accessor2(void* msg, size_t ofs, size_t size, - int elem_size_lg2, - upb_Arena* arena) { - upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); - upb_Array* arr = *arr_ptr; - if (!arr || arr->capacity < size) { - return _upb_Array_Resize_fallback(arr_ptr, size, elem_size_lg2, arena); - } - arr->size = size; - return _upb_array_ptr(arr); -} - -UPB_INLINE bool _upb_Array_Append_accessor2(void* msg, size_t ofs, - int elem_size_lg2, - const void* value, - upb_Arena* arena) { - upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); - size_t elem_size = 1 << elem_size_lg2; - upb_Array* arr = *arr_ptr; - void* ptr; - if (!arr || arr->size == arr->capacity) { - return _upb_Array_Append_fallback(arr_ptr, value, elem_size_lg2, arena); - } - ptr = _upb_array_ptr(arr); - memcpy(UPB_PTR_AT(ptr, arr->size * elem_size, char), value, elem_size); - arr->size++; - return true; -} - -// Used by old generated code, remove once all code has been regenerated. -UPB_INLINE int _upb_sizelg2(upb_CType type) { - switch (type) { - case kUpb_CType_Bool: - return 0; - case kUpb_CType_Float: - case kUpb_CType_Int32: - case kUpb_CType_UInt32: - case kUpb_CType_Enum: - return 2; - case kUpb_CType_Message: - return UPB_SIZE(2, 3); - case kUpb_CType_Double: - case kUpb_CType_Int64: - case kUpb_CType_UInt64: - return 3; - case kUpb_CType_String: - case kUpb_CType_Bytes: - return UPB_SIZE(3, 4); - } - UPB_UNREACHABLE(); -} - -UPB_INLINE void* _upb_Array_Resize_accessor(void* msg, size_t ofs, size_t size, - upb_CType type, upb_Arena* arena) { - return _upb_Array_Resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena); -} - -UPB_INLINE bool _upb_Array_Append_accessor(void* msg, size_t ofs, - size_t elem_size, upb_CType type, - const void* value, - upb_Arena* arena) { - (void)elem_size; - return _upb_Array_Append_accessor2(msg, ofs, _upb_sizelg2(type), value, - arena); +UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { + *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; } #ifdef __cplusplus diff --git a/upb/collections/map.c b/upb/collections/map.c index e27e8b8980..fb6aedba4b 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -35,25 +35,24 @@ // Must be last. #include "upb/port/def.inc" -/* Strings/bytes are special-cased in maps. */ -static char _upb_CTypeo_mapsize[12] = { - 0, - 1, /* kUpb_CType_Bool */ - 4, /* kUpb_CType_Float */ - 4, /* kUpb_CType_Int32 */ - 4, /* kUpb_CType_UInt32 */ - 4, /* kUpb_CType_Enum */ - sizeof(void*), /* kUpb_CType_Message */ - 8, /* kUpb_CType_Double */ - 8, /* kUpb_CType_Int64 */ - 8, /* kUpb_CType_UInt64 */ - 0, /* kUpb_CType_String */ - 0, /* kUpb_CType_Bytes */ +// Strings/bytes are special-cased in maps. +char _upb_Map_CTypeSizeTable[12] = { + [kUpb_CType_Bool] = 1, + [kUpb_CType_Float] = 4, + [kUpb_CType_Int32] = 4, + [kUpb_CType_UInt32] = 4, + [kUpb_CType_Enum] = 4, + [kUpb_CType_Message] = sizeof(void*), + [kUpb_CType_Double] = 8, + [kUpb_CType_Int64] = 8, + [kUpb_CType_UInt64] = 8, + [kUpb_CType_String] = UPB_MAPTYPE_STRING, + [kUpb_CType_Bytes] = UPB_MAPTYPE_STRING, }; upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) { - return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type], - _upb_CTypeo_mapsize[value_type]); + return _upb_Map_New(a, _upb_Map_CTypeSize(key_type), + _upb_Map_CTypeSize(value_type)); } size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); } @@ -71,8 +70,11 @@ upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, map->val_size, arena); } -bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) { - return _upb_Map_Delete(map, &key, map->key_size); +bool upb_Map_Delete(upb_Map* map, upb_MessageValue key, upb_MessageValue* val) { + upb_value v; + const bool ok = _upb_Map_Delete(map, &key, map->key_size, &v); + if (val) val->uint64_val = v.val; + return ok; } bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, diff --git a/upb/collections/map.h b/upb/collections/map.h index ae2cf05d23..920a5019c6 100644 --- a/upb/collections/map.h +++ b/upb/collections/map.h @@ -39,20 +39,21 @@ extern "C" { #endif -/* Creates a new map on the given arena with the given key/value size. */ -upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type); +// Creates a new map on the given arena with the given key/value size. +UPB_API upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, + upb_CType value_type); -/* Returns the number of entries in the map. */ -size_t upb_Map_Size(const upb_Map* map); +// Returns the number of entries in the map. +UPB_API size_t upb_Map_Size(const upb_Map* map); -/* Stores a value for the given key into |*val| (or the zero value if the key is - * not present). Returns whether the key was present. The |val| pointer may be - * NULL, in which case the function tests whether the given key is present. */ -bool upb_Map_Get(const upb_Map* map, upb_MessageValue key, - upb_MessageValue* val); +// Stores a value for the given key into |*val| (or the zero value if the key is +// not present). Returns whether the key was present. The |val| pointer may be +// NULL, in which case the function tests whether the given key is present. +UPB_API bool upb_Map_Get(const upb_Map* map, upb_MessageValue key, + upb_MessageValue* val); -/* Removes all entries in the map. */ -void upb_Map_Clear(upb_Map* map); +// Removes all entries in the map. +UPB_API void upb_Map_Clear(upb_Map* map); typedef enum { kUpb_MapInsertStatus_Inserted = 0, @@ -60,23 +61,32 @@ typedef enum { kUpb_MapInsertStatus_OutOfMemory = 2, } upb_MapInsertStatus; -/* Sets the given key to the given value, returning whether the key was inserted - * or replaced. If the key was inserted, then any existing iterators will be - * invalidated. */ -upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, - upb_MessageValue val, upb_Arena* arena); - -/* Sets the given key to the given value. Returns false if memory allocation - * failed. If the key is newly inserted, then any existing iterators will be - * invalidated. */ -UPB_INLINE bool upb_Map_Set(upb_Map* map, upb_MessageValue key, - upb_MessageValue val, upb_Arena* arena) { +// Sets the given key to the given value, returning whether the key was inserted +// or replaced. If the key was inserted, then any existing iterators will be +// invalidated. +UPB_API upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, + upb_MessageValue val, + upb_Arena* arena); + +// Sets the given key to the given value. Returns false if memory allocation +// failed. If the key is newly inserted, then any existing iterators will be +// invalidated. +UPB_API_INLINE bool upb_Map_Set(upb_Map* map, upb_MessageValue key, + upb_MessageValue val, upb_Arena* arena) { return upb_Map_Insert(map, key, val, arena) != kUpb_MapInsertStatus_OutOfMemory; } // Deletes this key from the table. Returns true if the key was present. -bool upb_Map_Delete(upb_Map* map, upb_MessageValue key); +// If present and |val| is non-NULL, stores the deleted value. +UPB_API bool upb_Map_Delete(upb_Map* map, upb_MessageValue key, + upb_MessageValue* val); + +// (DEPRECATED and going away soon. Do not use.) +UPB_INLINE bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key, + upb_MessageValue* val) { + return upb_Map_Delete(map, key, val); +} // Map iteration: // @@ -90,8 +100,8 @@ bool upb_Map_Delete(upb_Map* map, upb_MessageValue key); // Advances to the next entry. Returns false if no more entries are present. // Otherwise returns true and populates both *key and *value. -bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, - upb_MessageValue* val, size_t* iter); +UPB_API bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, + upb_MessageValue* val, size_t* iter); // DEPRECATED iterator, slated for removal. @@ -107,12 +117,12 @@ bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, // Advances to the next entry. Returns false if no more entries are present. bool upb_MapIterator_Next(const upb_Map* map, size_t* iter); -/* Returns true if the iterator still points to a valid entry, or false if the - * iterator is past the last element. It is an error to call this function with - * kUpb_Map_Begin (you must call next() at least once first). */ +// Returns true if the iterator still points to a valid entry, or false if the +// iterator is past the last element. It is an error to call this function with +// kUpb_Map_Begin (you must call next() at least once first). bool upb_MapIterator_Done(const upb_Map* map, size_t iter); -/* Returns the key and value for this entry of the map. */ +// Returns the key and value for this entry of the map. upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter); upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter); diff --git a/upb/collections/map_gencode_util.h b/upb/collections/map_gencode_util.h index 59764c96b2..794d76b9f4 100644 --- a/upb/collections/map_gencode_util.h +++ b/upb/collections/map_gencode_util.h @@ -41,50 +41,6 @@ extern "C" { // Message map operations, these get the map from the message first. -UPB_INLINE size_t _upb_msg_map_size(const upb_Message* msg, size_t ofs) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - return map ? _upb_Map_Size(map) : 0; -} - -UPB_INLINE bool _upb_msg_map_get(const upb_Message* msg, size_t ofs, - const void* key, size_t key_size, void* val, - size_t val_size) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return false; - return _upb_Map_Get(map, key, key_size, val, val_size); -} - -UPB_INLINE void* _upb_msg_map_next(const upb_Message* msg, size_t ofs, - size_t* iter) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return NULL; - return _upb_map_next(map, iter); -} - -UPB_INLINE bool _upb_msg_map_set(upb_Message* msg, size_t ofs, const void* key, - size_t key_size, void* val, size_t val_size, - upb_Arena* arena) { - upb_Map** map = UPB_PTR_AT(msg, ofs, upb_Map*); - if (!*map) { - *map = _upb_Map_New(arena, key_size, val_size); - } - return _upb_Map_Insert(*map, key, key_size, val, val_size, arena) != - kUpb_MapInsertStatus_OutOfMemory; -} - -UPB_INLINE bool _upb_msg_map_delete(upb_Message* msg, size_t ofs, - const void* key, size_t key_size) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return false; - return _upb_Map_Delete(map, key, key_size); -} - -UPB_INLINE void _upb_msg_map_clear(upb_Message* msg, size_t ofs) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return; - _upb_Map_Clear(map); -} - UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { const upb_tabent* ent = (const upb_tabent*)msg; uint32_t u32len; diff --git a/upb/collections/map_internal.h b/upb/collections/map_internal.h index f235cb3ecd..ceadf83359 100644 --- a/upb/collections/map_internal.h +++ b/upb/collections/map_internal.h @@ -111,10 +111,10 @@ UPB_INLINE void _upb_Map_Clear(upb_Map* map) { upb_strtable_clear(&map->table); } -UPB_INLINE bool _upb_Map_Delete(upb_Map* map, const void* key, - size_t key_size) { +UPB_INLINE bool _upb_Map_Delete(upb_Map* map, const void* key, size_t key_size, + upb_value* val) { upb_StringView k = _upb_map_tokey(key, key_size); - return upb_strtable_remove2(&map->table, k.data, k.size, NULL); + return upb_strtable_remove2(&map->table, k.data, k.size, val); } UPB_INLINE bool _upb_Map_Get(const upb_Map* map, const void* key, @@ -151,6 +151,13 @@ UPB_INLINE size_t _upb_Map_Size(const upb_Map* map) { return map->table.t.count; } +// Strings/bytes are special-cased in maps. +extern char _upb_Map_CTypeSizeTable[12]; + +UPB_INLINE size_t _upb_Map_CTypeSize(upb_CType ctype) { + return _upb_Map_CTypeSizeTable[ctype]; +} + // Creates a new map on the given arena with this key/value type. upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 83ca4040ec..43a79b9a9a 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -33,96 +33,12 @@ #include "upb/message/message.h" #include "upb/wire/decode.h" #include "upb/wire/encode.h" -#include "upb/wire/types.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" // Must be last. #include "upb/port/def.inc" -// Maps descriptor type to elem_size_lg2. -static int _upb_MiniTableField_CTypeLg2Size(const upb_MiniTableField* f) { - static const uint8_t sizes[] = { - -1, /* invalid descriptor type */ - 3, /* DOUBLE */ - 2, /* FLOAT */ - 3, /* INT64 */ - 3, /* UINT64 */ - 2, /* INT32 */ - 3, /* FIXED64 */ - 2, /* FIXED32 */ - 0, /* BOOL */ - UPB_SIZE(3, 4), /* STRING */ - UPB_SIZE(2, 3), /* GROUP */ - UPB_SIZE(2, 3), /* MESSAGE */ - UPB_SIZE(3, 4), /* BYTES */ - 2, /* UINT32 */ - 2, /* ENUM */ - 2, /* SFIXED32 */ - 3, /* SFIXED64 */ - 2, /* SINT32 */ - 3, /* SINT64 */ - }; - return sizes[f->descriptortype]; -} - -void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, - size_t len, upb_Arena* arena) { - return _upb_Array_Resize_accessor2( - msg, field->offset, len, _upb_MiniTableField_CTypeLg2Size(field), arena); -} - -typedef struct { - const char* ptr; - uint64_t val; -} decode_vret; - -UPB_NOINLINE -static decode_vret decode_longvarint64(const char* ptr, uint64_t val) { - decode_vret ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } - } - return ret; -} - -UPB_FORCEINLINE -static const char* decode_varint64(const char* ptr, uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr) return NULL; - *val = res.val; - return res.ptr; - } -} - -UPB_FORCEINLINE -static const char* decode_tag(const char* ptr, uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = (uint32_t)byte; - return ptr + 1; - } else { - const char* start = ptr; - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - return NULL; // Malformed. - } - *val = (uint32_t)res.val; - return res.ptr; - } -} - // Parses unknown data by merging into existing base_message or creating a // new message usingg mini_table. static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage( @@ -140,8 +56,8 @@ static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage( const char* data = unknown_data; uint32_t tag; uint64_t message_len = 0; - data = decode_tag(data, &tag); - data = decode_varint64(data, &message_len); + data = upb_WireReader_ReadTag(data, &tag); + data = upb_WireReader_ReadVarint(data, &message_len); upb_DecodeStatus status = upb_Decode(data, message_len, ret.message, mini_table, NULL, decode_options, arena); if (status == kUpb_DecodeStatus_OutOfMemory) { @@ -224,133 +140,44 @@ upb_GetExtensionAsBytes_Status upb_MiniTable_GetExtensionAsBytes( const char* data = result.ptr; uint32_t tag; uint64_t message_len = 0; - data = decode_tag(data, &tag); - data = decode_varint64(data, &message_len); + data = upb_WireReader_ReadTag(data, &tag); + data = upb_WireReader_ReadVarint(data, &message_len); *extension_data = data; *len = message_len; return kUpb_GetExtensionAsBytes_Ok; } -static const char* UnknownFieldSet_SkipGroup(const char* ptr, const char* end, - int group_number); - -static const char* UnknownFieldSet_SkipField(const char* ptr, const char* end, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return decode_varint64(ptr, &val); - } - case kUpb_WireType_64Bit: - if (end - ptr < 8) return NULL; - return ptr + 8; - case kUpb_WireType_32Bit: - if (end - ptr < 4) return NULL; - return ptr + 4; - case kUpb_WireType_Delimited: { - uint64_t size; - ptr = decode_varint64(ptr, &size); - if (!ptr || end - ptr < size) return NULL; - return ptr + size; - } - case kUpb_WireType_StartGroup: - return UnknownFieldSet_SkipGroup(ptr, end, field_number); - case kUpb_WireType_EndGroup: - return NULL; - default: - assert(0); - return NULL; - } -} - -static const char* UnknownFieldSet_SkipGroup(const char* ptr, const char* end, - int group_number) { - uint32_t end_tag = (group_number << 3) | kUpb_WireType_EndGroup; - while (true) { - if (ptr == end) return NULL; - uint64_t tag; - ptr = decode_varint64(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_tag) return ptr; - ptr = UnknownFieldSet_SkipField(ptr, end, (uint32_t)tag); - if (!ptr) return NULL; - } - return ptr; +upb_FindUnknownRet upb_FindUnknownRet_ParseError() { + return (upb_FindUnknownRet){.status = kUpb_FindUnknown_ParseError}; } -enum { - kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup, - kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup, - kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint, - kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited, -}; - upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, uint32_t field_number) { + const int depth_limit = 100; // TODO: this should be a parameter size_t size; upb_FindUnknownRet ret; const char* ptr = upb_Message_GetUnknown(msg, &size); - if (size == 0) { - ret.status = kUpb_FindUnknown_NotPresent; - ret.ptr = NULL; - ret.len = 0; - return ret; - } - const char* end = ptr + size; - uint64_t uint64_val; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); - while (ptr < end) { - uint32_t tag = 0; - int field; - int wire_type; + while (!upb_EpsCopyInputStream_IsDone(&stream, &ptr)) { + uint32_t tag; const char* unknown_begin = ptr; - ptr = decode_tag(ptr, &tag); - field = tag >> 3; - wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_EndGroup: - ret.status = kUpb_FindUnknown_ParseError; - return ret; - case kUpb_WireType_Varint: - ptr = decode_varint64(ptr, &uint64_val); - if (!ptr) { - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - break; - case kUpb_WireType_32Bit: - ptr += 4; - break; - case kUpb_WireType_64Bit: - ptr += 8; - break; - case kUpb_WireType_Delimited: - // Read size. - ptr = decode_varint64(ptr, &uint64_val); - if (uint64_val >= INT32_MAX || !ptr) { - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - ptr += uint64_val; - break; - case kUpb_WireType_StartGroup: - // tag >> 3 specifies the group number, recurse and skip - // until we see group end tag. - ptr = UnknownFieldSet_SkipGroup(ptr, end, field_number); - break; - default: - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - if (field_number == field) { + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return upb_FindUnknownRet_ParseError(); + if (field_number == upb_WireReader_GetFieldNumber(tag)) { ret.status = kUpb_FindUnknown_Ok; - ret.ptr = unknown_begin; - ret.len = ptr - unknown_begin; + ret.ptr = upb_EpsCopyInputStream_GetAliasedPtr(&stream, unknown_begin); + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + // Because we know that the input is a flat buffer, it is safe to perform + // pointer arithmetic on aliased pointers. + ret.len = upb_EpsCopyInputStream_GetAliasedPtr(&stream, ptr) - ret.ptr; return ret; } + + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + if (!ptr) return upb_FindUnknownRet_ParseError(); } ret.status = kUpb_FindUnknown_NotPresent; ret.ptr = NULL; @@ -486,8 +313,8 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap( /* base_message= */ NULL, decode_options, arena); if (ret.status != kUpb_UnknownToMessage_Ok) return ret.status; // Allocate map on demand before append. - upb_Map* map = - upb_MiniTable_GetMutableMap(msg, map_entry_mini_table, field, arena); + upb_Map* map = upb_Message_GetOrCreateMutableMap(msg, map_entry_mini_table, + field, arena); upb_Message* map_entry_message = ret.message; upb_MapInsertStatus insert_status = upb_Message_InsertMapEntry( map, mini_table, field, map_entry_message, arena); diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 697b5ed777..9a26fd9f63 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -30,6 +30,7 @@ #include "upb/base/descriptor_constants.h" #include "upb/collections/array.h" +#include "upb/collections/array_internal.h" #include "upb/collections/map.h" #include "upb/collections/map_internal.h" #include "upb/message/extension_internal.h" @@ -41,6 +42,22 @@ // Must be last. #include "upb/port/def.inc" +#if defined(__GNUC__) && !defined(__clang__) +// GCC raises incorrect warnings in these functions. It thinks that we are +// overrunning buffers, but we carefully write the functions in this file to +// guarantee that this is impossible. GCC gets this wrong due it its failure +// to perform constant propagation as we expect: +// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108217 +// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108226 +// +// Unfortunately this also indicates that GCC is not optimizing away the +// switch() in cases where it should be, compromising the performance. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#pragma GCC diagnostic ignored "-Wstringop-overread" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -106,6 +123,32 @@ UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, UPB_UNREACHABLE(); } +UPB_INLINE size_t +_upb_MiniTable_ElementSizeLg2(const upb_MiniTableField* field) { + const unsigned char table[] = { + 0, + 3, // kUpb_FieldType_Double = 1, + 2, // kUpb_FieldType_Float = 2, + 3, // kUpb_FieldType_Int64 = 3, + 3, // kUpb_FieldType_UInt64 = 4, + 2, // kUpb_FieldType_Int32 = 5, + 3, // kUpb_FieldType_Fixed64 = 6, + 2, // kUpb_FieldType_Fixed32 = 7, + 0, // kUpb_FieldType_Bool = 8, + UPB_SIZE(3, 4), // kUpb_FieldType_String = 9, + UPB_SIZE(2, 3), // kUpb_FieldType_Group = 10, + UPB_SIZE(2, 3), // kUpb_FieldType_Message = 11, + UPB_SIZE(3, 4), // kUpb_FieldType_Bytes = 12, + 2, // kUpb_FieldType_UInt32 = 13, + 2, // kUpb_FieldType_Enum = 14, + 2, // kUpb_FieldType_SFixed32 = 15, + 3, // kUpb_FieldType_SFixed64 = 16, + 2, // kUpb_FieldType_SInt32 = 17, + 3, // kUpb_FieldType_SInt64 = 18, + }; + return table[field->descriptortype]; +} + // Here we define universal getter/setter functions for message fields. // These look very branchy and inefficient, but as long as the MiniTableField // values are known at compile time, all the branches are optimized away and @@ -251,6 +294,22 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( field); } +UPB_INLINE upb_Map* _upb_Message_GetOrCreateMutableMap( + upb_Message* msg, const upb_MiniTableField* field, size_t key_size, + size_t val_size, upb_Arena* arena) { + _upb_MiniTableField_CheckIsMap(field); + upb_Map* map = NULL; + upb_Map* default_map_value = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); + if (!map) { + map = _upb_Map_New(arena, key_size, val_size); + // Check again due to: https://godbolt.org/z/7WfaoKG1r + _upb_MiniTableField_CheckIsMap(field); + _upb_Message_SetNonExtensionField(msg, field, &map); + } + return map; +} + // EVERYTHING ABOVE THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// UPB_API_INLINE void upb_Message_ClearField(upb_Message* msg, @@ -510,6 +569,7 @@ UPB_API_INLINE upb_Message* upb_MiniTable_GetMutableMessage( UPB_API_INLINE const upb_Array* upb_Message_GetArray( const upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsArray(field); const upb_Array* ret; const upb_Array* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); @@ -518,49 +578,72 @@ UPB_API_INLINE const upb_Array* upb_Message_GetArray( UPB_API_INLINE upb_Array* upb_Message_GetMutableArray( upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsArray(field); return (upb_Array*)upb_Message_GetArray(msg, field); } UPB_API_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( - upb_Message* msg, const upb_MiniTableField* field, upb_CType ctype, - upb_Arena* arena) { + upb_Message* msg, const upb_MiniTableField* field, upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); upb_Array* array = upb_Message_GetMutableArray(msg, field); if (!array) { - array = upb_Array_New(arena, ctype); + array = _upb_Array_New(arena, 4, _upb_MiniTable_ElementSizeLg2(field)); + // Check again due to: https://godbolt.org/z/7WfaoKG1r + _upb_MiniTableField_CheckIsArray(field); _upb_Message_SetField(msg, field, &array, arena); } return array; } -void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, - size_t len, upb_Arena* arena); +UPB_INLINE upb_Array* upb_Message_ResizeArrayUninitialized( + upb_Message* msg, const upb_MiniTableField* field, size_t size, + upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, size, arena)) return NULL; + return arr; +} + +// TODO: remove, migrate users to upb_Message_ResizeArrayUninitialized(), which +// has the same semantics but a clearer name. Alternatively, if users want an +// initialized variant, we can also offer that. +UPB_API_INLINE void* upb_Message_ResizeArray(upb_Message* msg, + const upb_MiniTableField* field, + size_t size, upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); + upb_Array* arr = + upb_Message_ResizeArrayUninitialized(msg, field, size, arena); + return _upb_array_ptr(arr); +} UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( const upb_MiniTableField* field) { return field->descriptortype == kUpb_FieldType_Enum; } -UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( +UPB_API_INLINE const upb_Map* upb_Message_GetMap( + const upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsMap(field); + const upb_Map* ret; + const upb_Map* default_val = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; +} + +UPB_API_INLINE upb_Map* upb_Message_GetOrCreateMutableMap( upb_Message* msg, const upb_MiniTable* map_entry_mini_table, const upb_MiniTableField* field, upb_Arena* arena) { - UPB_ASSERT(map_entry_mini_table != NULL); - UPB_ASSUME(upb_IsRepeatedOrMap(field)); - upb_Map* map = NULL; - upb_Map* default_map_value = NULL; - _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); - if (!map) { - // Allocate map. - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group); - const upb_MiniTableField* map_entry_key_field = - &map_entry_mini_table->fields[0]; - const upb_MiniTableField* map_entry_value_field = - &map_entry_mini_table->fields[1]; - map = upb_Map_New(arena, upb_MiniTableField_CType(map_entry_key_field), - upb_MiniTableField_CType(map_entry_value_field)); - _upb_Message_SetNonExtensionField(msg, field, &map); - } - return map; + UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group); + const upb_MiniTableField* map_entry_key_field = + &map_entry_mini_table->fields[0]; + const upb_MiniTableField* map_entry_value_field = + &map_entry_mini_table->fields[1]; + return _upb_Message_GetOrCreateMutableMap( + msg, field, + _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_key_field)), + _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_value_field)), + arena); } // Updates a map entry given an entry message. @@ -665,6 +748,10 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap( } /* extern "C" */ #endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + #include "upb/port/undef.inc" #endif // UPB_MESSAGE_ACCESSORS_H_ diff --git a/upb/message/accessors_test.cc b/upb/message/accessors_test.cc index a4c21a5ae9..edb9cd8a3a 100644 --- a/upb/message/accessors_test.cc +++ b/upb/message/accessors_test.cc @@ -756,8 +756,8 @@ TEST(GeneratedCode, PromoteUnknownToMap) { decode_options, arena); EXPECT_EQ(promote_result, kUpb_UnknownToMessage_Ok); - upb_Map* map = upb_MiniTable_GetMutableMap(msg, map_entry_mini_table, - &mini_table->fields[1], arena); + upb_Map* map = upb_Message_GetOrCreateMutableMap( + msg, map_entry_mini_table, &mini_table->fields[1], arena); EXPECT_NE(map, nullptr); // Lookup in map. upb_MessageValue key; diff --git a/upb/mini_table/common.c b/upb/mini_table/common.c index 2b84af1a7f..19e9006b01 100644 --- a/upb/mini_table/common.c +++ b/upb/mini_table/common.c @@ -54,12 +54,30 @@ const int8_t _kUpb_FromBase92[] = { }; const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( - const upb_MiniTable* table, uint32_t number) { - int n = table->field_count; - for (int i = 0; i < n; i++) { - if (table->fields[i].number == number) { - return &table->fields[i]; + const upb_MiniTable* t, uint32_t number) { + const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX + + // Ideal case: index into dense fields + if (i < t->dense_below) { + UPB_ASSERT(t->fields[i].number == number); + return &t->fields[i]; + } + + // Slow case: binary search + int lo = t->dense_below; + int hi = t->field_count - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + int num = t->fields[mid].number; + if (num < number) { + lo = mid + 1; + continue; + } + if (num > number) { + hi = mid - 1; + continue; } + return &t->fields[mid]; } return NULL; } @@ -76,38 +94,3 @@ upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field) { } return field->descriptortype; } - -upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { - switch (f->descriptortype) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); -} diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index 0f94cbc2fd..bb632e3bed 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -58,7 +58,40 @@ UPB_API const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( UPB_API upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field); -UPB_API upb_CType upb_MiniTableField_CType(const upb_MiniTableField* field); +UPB_API_INLINE upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { + switch (f->descriptortype) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; + } + UPB_UNREACHABLE(); +} UPB_API_INLINE bool upb_MiniTableField_IsExtension( const upb_MiniTableField* field) { diff --git a/upb/mini_table/field_internal.h b/upb/mini_table/field_internal.h index f58531a5a6..e2b8f6bcc2 100644 --- a/upb/mini_table/field_internal.h +++ b/upb/mini_table/field_internal.h @@ -75,6 +75,8 @@ typedef enum { kUpb_FieldRep_StringView = 2, kUpb_FieldRep_8Byte = 3, + kUpb_FieldRep_NativePointer = + UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte), kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, } upb_FieldRep; @@ -93,6 +95,20 @@ UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTableField* field) { return (upb_FieldMode)(field->mode & 3); } +UPB_INLINE void _upb_MiniTableField_CheckIsArray( + const upb_MiniTableField* field) { + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Array); + UPB_ASSUME(field->presence == 0); +} + +UPB_INLINE void _upb_MiniTableField_CheckIsMap( + const upb_MiniTableField* field) { + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + UPB_ASSUME(field->presence == 0); +} + UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTableField* field) { // This works because upb_FieldMode has no value 3. return !(field->mode & kUpb_FieldMode_Scalar); diff --git a/upb/port/def.inc b/upb/port/def.inc index b507b18769..b23b406992 100644 --- a/upb/port/def.inc +++ b/upb/port/def.inc @@ -243,8 +243,8 @@ #endif /* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully - * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */ -#if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE) + * degrade to non-fasttable if the runtime or platform do not support it. */ +#if !UPB_FASTTABLE #define UPB_FASTTABLE_INIT(...) #else #define UPB_FASTTABLE_INIT(...) __VA_ARGS__ diff --git a/upb/text/encode.c b/upb/text/encode.c index 830f813775..c694088fc2 100644 --- a/upb/text/encode.c +++ b/upb/text/encode.c @@ -38,6 +38,8 @@ #include "upb/lex/round_trip.h" #include "upb/port/vsnprintf_compat.h" #include "upb/reflection/message.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/types.h" // Must be last. @@ -310,23 +312,6 @@ static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) { } \ } while (0) -static const char* txtenc_parsevarint(const char* ptr, const char* limit, - uint64_t* val) { - uint8_t byte; - int bitpos = 0; - *val = 0; - - do { - CHK(bitpos < 70 && ptr < limit); - byte = *ptr; - *val |= (uint64_t)(byte & 0x7F) << bitpos; - ptr++; - bitpos += 7; - } while (byte & 0x80); - - return ptr; -} - /* * Unknown fields are printed by number. * @@ -337,89 +322,95 @@ static const char* txtenc_parsevarint(const char* ptr, const char* limit, * 1: 111 * } */ -static const char* txtenc_unknown(txtenc* e, const char* ptr, const char* end, +static const char* txtenc_unknown(txtenc* e, const char* ptr, + upb_EpsCopyInputStream* stream, int groupnum) { - while (ptr < end) { - uint64_t tag_64; + // We are guaranteed that the unknown data is valid wire format, and will not + // contain tag zero. + uint32_t end_group = groupnum > 0 + ? ((groupnum << kUpb_WireReader_WireTypeBits) | + kUpb_WireType_EndGroup) + : 0; + + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { uint32_t tag; - CHK(ptr = txtenc_parsevarint(ptr, end, &tag_64)); - CHK(tag_64 < UINT32_MAX); - tag = (uint32_t)tag_64; - - if ((tag & 7) == kUpb_WireType_EndGroup) { - CHK((tag >> 3) == (uint32_t)groupnum); - return ptr; - } + CHK(ptr = upb_WireReader_ReadTag(ptr, &tag)); + if (tag == end_group) return ptr; txtenc_indent(e); - txtenc_printf(e, "%d: ", (int)(tag >> 3)); + txtenc_printf(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag)); - switch (tag & 7) { + switch (upb_WireReader_GetWireType(tag)) { case kUpb_WireType_Varint: { uint64_t val; - CHK(ptr = txtenc_parsevarint(ptr, end, &val)); + CHK(ptr = upb_WireReader_ReadVarint(ptr, &val)); txtenc_printf(e, "%" PRIu64, val); break; } case kUpb_WireType_32Bit: { uint32_t val; - CHK(end - ptr >= 4); - memcpy(&val, ptr, 4); - ptr += 4; + ptr = upb_WireReader_ReadFixed32(ptr, &val); txtenc_printf(e, "0x%08" PRIu32, val); break; } case kUpb_WireType_64Bit: { uint64_t val; - CHK(end - ptr >= 8); - memcpy(&val, ptr, 8); - ptr += 8; + ptr = upb_WireReader_ReadFixed64(ptr, &val); txtenc_printf(e, "0x%016" PRIu64, val); break; } case kUpb_WireType_Delimited: { - uint64_t len; - size_t avail = end - ptr; + int size; char* start = e->ptr; size_t start_overflow = e->overflow; - CHK(ptr = txtenc_parsevarint(ptr, end, &len)); - CHK(avail >= len); + CHK(ptr = upb_WireReader_ReadSize(ptr, &size)); + CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)); - /* Speculatively try to parse as message. */ + // Speculatively try to parse as message. txtenc_putstr(e, "{"); txtenc_endfield(e); + + // EpsCopyInputStream can't back up, so create a sub-stream for the + // speculative parse. + upb_EpsCopyInputStream sub_stream; + const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr); + upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true); + e->indent_depth++; - if (txtenc_unknown(e, ptr, end, -1)) { + if (txtenc_unknown(e, sub_ptr, &sub_stream, -1)) { + ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size); e->indent_depth--; txtenc_indent(e); txtenc_putstr(e, "}"); } else { - /* Didn't work out, print as raw bytes. */ - upb_StringView str; + // Didn't work out, print as raw bytes. e->indent_depth--; e->ptr = start; e->overflow = start_overflow; - str.data = ptr; - str.size = len; - txtenc_string(e, str, true); + const char* str = ptr; + ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL); + assert(ptr); + txtenc_string(e, (upb_StringView){.data = str, .size = size}, true); } - ptr += len; break; } case kUpb_WireType_StartGroup: txtenc_putstr(e, "{"); txtenc_endfield(e); e->indent_depth++; - CHK(ptr = txtenc_unknown(e, ptr, end, tag >> 3)); + CHK(ptr = txtenc_unknown(e, ptr, stream, + upb_WireReader_GetFieldNumber(tag))); e->indent_depth--; txtenc_indent(e); txtenc_putstr(e, "}"); break; + default: + return NULL; } txtenc_endfield(e); } - return groupnum == -1 ? ptr : NULL; + return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL; } #undef CHK @@ -441,11 +432,13 @@ static void txtenc_msg(txtenc* e, const upb_Message* msg, } if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) { - size_t len; - const char* ptr = upb_Message_GetUnknown(msg, &len); - char* start = e->ptr; - if (ptr) { - if (!txtenc_unknown(e, ptr, ptr + len, -1)) { + size_t size; + const char* ptr = upb_Message_GetUnknown(msg, &size); + if (size != 0) { + char* start = e->ptr; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); + if (!txtenc_unknown(e, ptr, &stream, -1)) { /* Unknown failed to parse, back up and don't print it at all. */ e->ptr = start; } diff --git a/upb/util/BUILD b/upb/util/BUILD index 56afe3373a..159731ade0 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -108,9 +108,11 @@ cc_library( hdrs = ["compare.h"], visibility = ["//visibility:public"], deps = [ + "//:eps_copy_input_stream", "//:port", "//:reflection", - "//:wire", + "//:wire_reader", + "//:wire_types", ], ) @@ -125,6 +127,7 @@ cc_test( deps = [ ":compare", "//:wire_internal", + "//:wire_types", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], diff --git a/upb/util/compare.c b/upb/util/compare.c index 60fd5c12e2..4215411594 100644 --- a/upb/util/compare.c +++ b/upb/util/compare.c @@ -27,6 +27,8 @@ #include "upb/util/compare.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/types.h" // Must be last. @@ -53,7 +55,7 @@ struct upb_UnknownFields { }; typedef struct { - const char* end; + upb_EpsCopyInputStream stream; upb_Arena* arena; upb_UnknownField* tmp; size_t tmp_size; @@ -76,25 +78,6 @@ static void upb_UnknownFields_Grow(upb_UnknownField_Context* ctx, *end = *base + new; } -static const char* upb_UnknownFields_ParseVarint(const char* ptr, - const char* limit, - uint64_t* val) { - uint8_t byte; - int bitpos = 0; - *val = 0; - - do { - // Unknown field data must be valid. - UPB_ASSERT(bitpos < 70 && ptr < limit); - byte = *ptr; - *val |= (uint64_t)(byte & 0x7F) << bitpos; - ptr++; - bitpos += 7; - } while (byte & 0x80); - - return ptr; -} - // We have to implement our own sort here, since qsort() is not an in-order // sort. Here we use merge sort, the simplest in-order sort. static void upb_UnknownFields_Merge(upb_UnknownField* arr, size_t start, @@ -151,11 +134,11 @@ static upb_UnknownFields* upb_UnknownFields_DoBuild( const char* ptr = *buf; uint32_t last_tag = 0; bool sorted = true; - while (ptr < ctx->end) { - uint64_t tag; - ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &tag); + while (!upb_EpsCopyInputStream_IsDone(&ctx->stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); UPB_ASSERT(tag <= UINT32_MAX); - int wire_type = tag & 7; + int wire_type = upb_WireReader_GetWireType(tag); if (wire_type == kUpb_WireType_EndGroup) break; if (tag < last_tag) sorted = false; last_tag = tag; @@ -169,25 +152,22 @@ static upb_UnknownFields* upb_UnknownFields_DoBuild( switch (wire_type) { case kUpb_WireType_Varint: - ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &field->data.varint); + ptr = upb_WireReader_ReadVarint(ptr, &field->data.varint); break; case kUpb_WireType_64Bit: - UPB_ASSERT(ctx->end - ptr >= 8); - memcpy(&field->data.uint64, ptr, 8); - ptr += 8; + ptr = upb_WireReader_ReadFixed64(ptr, &field->data.uint64); break; case kUpb_WireType_32Bit: - UPB_ASSERT(ctx->end - ptr >= 4); - memcpy(&field->data.uint32, ptr, 4); - ptr += 4; + ptr = upb_WireReader_ReadFixed32(ptr, &field->data.uint32); break; case kUpb_WireType_Delimited: { - uint64_t size; - ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &size); - UPB_ASSERT(ctx->end - ptr >= size); - field->data.delimited.data = ptr; + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + const char* s_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadStringAliased(&ctx->stream, &s_ptr, + size); + field->data.delimited.data = s_ptr; field->data.delimited.size = size; - ptr += size; break; } case kUpb_WireType_StartGroup: @@ -216,11 +196,12 @@ static upb_UnknownFields* upb_UnknownFields_DoBuild( // Builds a upb_UnknownFields data structure from the binary data in buf. static upb_UnknownFields* upb_UnknownFields_Build(upb_UnknownField_Context* ctx, - const char* buf, + const char* ptr, size_t size) { - ctx->end = buf + size; - upb_UnknownFields* fields = upb_UnknownFields_DoBuild(ctx, &buf); - UPB_ASSERT(buf == ctx->end); + upb_EpsCopyInputStream_Init(&ctx->stream, &ptr, size, true); + upb_UnknownFields* fields = upb_UnknownFields_DoBuild(ctx, &ptr); + UPB_ASSERT(upb_EpsCopyInputStream_IsDone(&ctx->stream, &ptr) && + !upb_EpsCopyInputStream_IsError(&ctx->stream)); return fields; } diff --git a/upb/wire/decode.c b/upb/wire/decode.c index 67979f807d..6ef1a7aa25 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -35,6 +35,7 @@ #include "upb/wire/common_internal.h" #include "upb/wire/decode_internal.h" #include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/swap_internal.h" #include "upb/wire/types.h" @@ -221,16 +222,12 @@ static upb_Message* _upb_Decoder_NewSubMessage( static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, int size, upb_StringView* str) { - if (d->options & kUpb_DecodeOption_AliasString) { - str->data = ptr; - } else { - char* data = upb_Arena_Malloc(&d->arena, size); - if (!data) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - memcpy(data, ptr, size); - str->data = data; - } + const char* str_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); + if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + str->data = str_ptr; str->size = size; - return ptr + size; + return ptr; } UPB_FORCEINLINE @@ -374,27 +371,21 @@ static const char* _upb_Decoder_DecodeFixedPacked( // Note: if/when the decoder supports multi-buffer input, we will need to // handle buffer seams here. if (_upb_IsLittleEndian()) { - memcpy(mem, ptr, val->size); - ptr += val->size; + ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); } else { - const char* end = ptr + val->size; + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); char* dst = mem; - while (ptr < end) { + while (!_upb_Decoder_IsDone(d, &ptr)) { if (lg2 == 2) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - memcpy(dst, &val, sizeof(val)); + ptr = upb_WireReader_ReadFixed32(ptr, dst); + dst += 4; } else { UPB_ASSERT(lg2 == 3); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - memcpy(dst, &val, sizeof(val)); + ptr = upb_WireReader_ReadFixed64(ptr, dst); + dst += 8; } - ptr += 1 << lg2; - dst += 1 << lg2; } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } return ptr; @@ -1031,21 +1022,17 @@ static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, _upb_Decoder_Munge(field->descriptortype, val); return ptr; case kUpb_WireType_32Bit: - memcpy(&val->uint32_val, ptr, 4); - val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val); *op = kUpb_DecodeOp_Scalar4Byte; if (((1 << field->descriptortype) & kFixed32OkMask) == 0) { *op = kUpb_DecodeOp_UnknownField; } - return ptr + 4; + return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); case kUpb_WireType_64Bit: - memcpy(&val->uint64_val, ptr, 8); - val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val); *op = kUpb_DecodeOp_Scalar8Byte; if (((1 << field->descriptortype) & kFixed64OkMask) == 0) { *op = kUpb_DecodeOp_UnknownField; } - return ptr + 8; + return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); case kUpb_WireType_Delimited: ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); *op = _upb_Decoder_GetDelimitedOp(mt, field); @@ -1264,9 +1251,8 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, upb_Decoder state; unsigned depth = (unsigned)options >> 16; - if (upb_EpsCopyInputStream_Init(&state.input, &buf, size)) { - options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf. - } + upb_EpsCopyInputStream_Init(&state.input, &buf, size, + options & kUpb_DecodeOption_AliasString); state.extreg = extreg; state.unknown = NULL; diff --git a/upb/wire/decode_fast.c b/upb/wire/decode_fast.c index 690f364fdd..6b048a0937 100644 --- a/upb/wire/decode_fast.c +++ b/upb/wire/decode_fast.c @@ -639,26 +639,17 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, ptr = fastdecode_longsize(ptr, &size); \ } \ \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size))) { \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ dst->size = 0; \ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ \ - if (d->options & kUpb_DecodeOption_AliasString) { \ - dst->data = ptr; \ - dst->size = size; \ - } else { \ - char* data = upb_Arena_Malloc(&d->arena, size); \ - if (!data) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - } \ - memcpy(data, ptr, size); \ - dst->data = data; \ - dst->size = size; \ - } \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ \ - ptr += size; \ if (validate_utf8) { \ data = (uint64_t)dst; \ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ @@ -702,7 +693,7 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, size_t common_has; \ char* buf; \ \ - UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ \ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ @@ -781,79 +772,73 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, hasbits, (uint64_t)dst); \ } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->data = ptr; \ - dst->size = size; \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \ - /* Buffer flipped and we can't alias any more. Bounce to */ \ - /* copyfunc(), but via dispatch since we need to reload table */ \ - /* data also. */ \ - fastdecode_commitarr(dst, &farr, sizeof(upb_StringView)); \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - } \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: diff --git a/upb/wire/decode_internal.h b/upb/wire/decode_internal.h index e1941b275f..14cc518c2e 100644 --- a/upb/wire/decode_internal.h +++ b/upb/wire/decode_internal.h @@ -115,8 +115,8 @@ const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, const char* ptr, int overrun); UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) { - return upb_EpsCopyInputStream_IsDone(&d->input, ptr, - &_upb_Decoder_IsDoneFallback); + return upb_EpsCopyInputStream_IsDoneWithCallback( + &d->input, ptr, &_upb_Decoder_IsDoneFallback); } UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( @@ -131,8 +131,6 @@ UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( } d->unknown = new_start; } - - d->options &= ~kUpb_DecodeOption_AliasString; return new_start; } diff --git a/upb/wire/eps_copy_input_stream.c b/upb/wire/eps_copy_input_stream.c new file mode 100644 index 0000000000..ebbe40adf8 --- /dev/null +++ b/upb/wire/eps_copy_input_stream.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/wire/eps_copy_input_stream.h" + +static const char* _upb_EpsCopyInputStream_NoOpCallback( + upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) { + return new_start; +} + +const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( + upb_EpsCopyInputStream* e, const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_EpsCopyInputStream_NoOpCallback); +} diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index ea6d530e10..35d66162f4 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -30,9 +30,15 @@ #include +#include "upb/mem/arena.h" + // Must be last. #include "upb/port/def.inc" +#ifdef __cplusplus +extern "C" { +#endif + // The maximum number of bytes a single protobuf field can take up in the // wire format. We only want to do one bounds check per field, so the input // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called, @@ -41,13 +47,28 @@ // this invariant. #define kUpb_EpsCopyInputStream_SlopBytes 16 +enum { + kUpb_EpsCopyInputStream_NoAliasing = 0, + kUpb_EpsCopyInputStream_OnPatch = 1, + kUpb_EpsCopyInputStream_NoDelta = 2 +}; + typedef struct { const char* end; // Can read up to SlopBytes bytes beyond this. const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) + uintptr_t aliasing; int limit; // Submessage limit relative to end + bool error; // To distinguish between EOF and error. char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; +// Returns true if the stream is in the error state. A stream enters the error +// state when the user reads past a limit (caught in IsDone()) or the +// ZeroCopyInputStream returns an error. +UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) { + return e->error; +} + typedef const char* upb_EpsCopyInputStream_BufferFlipCallback( upb_EpsCopyInputStream* e, const char* old_end, const char* new_start); @@ -56,25 +77,26 @@ typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc( // Initializes a upb_EpsCopyInputStream using the contents of the buffer // [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least -// kUpb_EpsCopyInputStream_SlopBytes, and returns true if the pointer has been -// updated. -UPB_INLINE bool upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, - const char** ptr, size_t size) { - bool ret; +// kUpb_EpsCopyInputStream_SlopBytes are available to read. +UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, + const char** ptr, size_t size, + bool enable_aliasing) { if (size <= kUpb_EpsCopyInputStream_SlopBytes) { memset(&e->patch, 0, 32); if (size) memcpy(&e->patch, *ptr, size); + e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch + : kUpb_EpsCopyInputStream_NoAliasing; *ptr = e->patch; e->end = *ptr + size; e->limit = 0; - ret = true; } else { e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; e->limit = kUpb_EpsCopyInputStream_SlopBytes; - ret = false; + e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta + : kUpb_EpsCopyInputStream_NoAliasing; } e->limit_ptr = e->end; - return ret; + e->error = false; } typedef enum { @@ -110,7 +132,7 @@ UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus( // // Postcondition: if the function returns false, there are at least // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr. -UPB_INLINE bool upb_EpsCopyInputStream_IsDone( +UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback( upb_EpsCopyInputStream* e, const char** ptr, upb_EpsCopyInputStream_IsDoneFallbackFunc* func) { int overrun; @@ -123,6 +145,22 @@ UPB_INLINE bool upb_EpsCopyInputStream_IsDone( *ptr = func(e, *ptr, overrun); return *ptr == NULL; } + UPB_UNREACHABLE(); +} + +const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( + upb_EpsCopyInputStream* e, const char* ptr, int overrun); + +// A simpler version of IsDoneWithCallback() that does not support a buffer flip +// callback. Useful in cases where we do not need to insert custom logic at +// every buffer flip. +// +// If this returns true, the user must call upb_EpsCopyInputStream_IsError() +// to distinguish between EOF and error. +UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e, + const char** ptr) { + return upb_EpsCopyInputStream_IsDoneWithCallback( + e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback); } // Returns the total number of bytes that are safe to read from the current @@ -195,6 +233,98 @@ UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true); } +// Returns true if aliasing_enabled=true was passed to +// upb_EpsCopyInputStream_Init() when this stream was initialized. +UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( + upb_EpsCopyInputStream* e) { + return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing; +} + +// Returns true if aliasing_enabled=true was passed to +// upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can +// alias into the region [ptr, size] in an input buffer. +UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( + upb_EpsCopyInputStream* e, const char* ptr, size_t size) { + // When EpsCopyInputStream supports streaming, this will need to become a + // runtime check. + return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) && + e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; +} + +// Returns a pointer into an input buffer that corresponds to the parsing +// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may +// be different if we are currently parsing out of the patch buffer. +// +// REQUIRES: Aliasing must be available for the given pointer. If the input is a +// flat buffer and aliasing is enabled, then aliasing will always be available. +UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( + upb_EpsCopyInputStream* e, const char* ptr) { + UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); + uintptr_t delta = + e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; + return (const char*)((uintptr_t)ptr + delta); +} + +// Reads string data from the input, aliasing into the input buffer instead of +// copying. The parsing pointer is passed in `*ptr`, and will be updated if +// necessary to point to the actual input buffer. Returns the new parsing +// pointer, which will be advanced past the string data. +// +// REQUIRES: Aliasing must be available for this data region (test with +// upb_EpsCopyInputStream_AliasingAvailable(). +UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased( + upb_EpsCopyInputStream* e, const char** ptr, size_t size) { + UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)); + const char* ret = *ptr + size; + *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr); + UPB_ASSUME(ret != NULL); + return ret; +} + +// Skips `size` bytes of data from the input and returns a pointer past the end. +// Returns NULL on end of stream or error. +UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e, + const char* ptr, int size) { + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; + return ptr + size; +} + +// Copies `size` bytes of data from the input `ptr` into the buffer `to`, and +// returns a pointer past the end. Returns NULL on end of stream or error. +UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e, + const char* ptr, void* to, + int size) { + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; + memcpy(to, ptr, size); + return ptr + size; +} + +// Reads string data from the stream and advances the pointer accordingly. +// If aliasing was enabled when the stream was initialized, then the returned +// pointer will point into the input buffer if possible, otherwise new data +// will be allocated from arena and copied into. We may be forced to copy even +// if aliasing was enabled if the input data spans input buffers. +// +// Returns NULL if memory allocation failed, or we reached a premature EOF. +UPB_INLINE const char* upb_EpsCopyInputStream_ReadString( + upb_EpsCopyInputStream* e, const char** ptr, size_t size, + upb_Arena* arena) { + if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) { + return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size); + } else { + // We need to allocate and copy. + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) { + return NULL; + } + UPB_ASSERT(arena); + char* data = (char*)upb_Arena_Malloc(arena, size); + if (!data) return NULL; + const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size); + *ptr = data; + return ret; + } +} + UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) { UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); } @@ -245,8 +375,13 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( e->limit -= kUpb_EpsCopyInputStream_SlopBytes; e->limit_ptr = e->end + e->limit; UPB_ASSERT(ptr < e->limit_ptr); + if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) { + e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start; + } return callback(e, old_end, new_start); } else { + UPB_ASSERT(overrun > e->limit); + e->error = true; return callback(e, NULL, NULL); } } @@ -280,6 +415,10 @@ static UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast( return true; } +#ifdef __cplusplus +} /* extern "C" */ +#endif + #include "upb/port/undef.inc" #endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ diff --git a/upb/wire/eps_copy_input_stream_test.cc b/upb/wire/eps_copy_input_stream_test.cc index 1249a64dc2..3b90064229 100644 --- a/upb/wire/eps_copy_input_stream_test.cc +++ b/upb/wire/eps_copy_input_stream_test.cc @@ -5,6 +5,7 @@ #include #include "gtest/gtest.h" +#include "upb/upb.hpp" // begin:google_only // #include "testing/fuzzing/fuzztest.h" // end:google_only @@ -14,8 +15,8 @@ namespace { TEST(EpsCopyInputStreamTest, ZeroSize) { upb_EpsCopyInputStream stream; const char* ptr = NULL; - upb_EpsCopyInputStream_Init(&stream, &ptr, 0); - EXPECT_TRUE(upb_EpsCopyInputStream_IsDone(&stream, &ptr, NULL)); + upb_EpsCopyInputStream_Init(&stream, &ptr, 0, false); + EXPECT_TRUE(upb_EpsCopyInputStream_IsDoneWithCallback(&stream, &ptr, NULL)); } // begin:google_only @@ -81,20 +82,59 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // // class EpsStream { // public: -// EpsStream(const std::string& data) : data_(data) { +// EpsStream(const std::string& data, bool enable_aliasing) +// : data_(data), enable_aliasing_(enable_aliasing) { // ptr_ = data_.data(); -// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size()); +// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size(), enable_aliasing); // } // // // Returns false at EOF or error. // int ReadData(int n, std::string* data) { +// EXPECT_LE(n, kUpb_EpsCopyInputStream_SlopBytes); +// if (enable_aliasing_) { +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, n)); +// } // // We want to verify that we can read kUpb_EpsCopyInputStream_SlopBytes // // safely, even if we haven't actually been requested to read that much. // // We copy to a global buffer so the copy can't be optimized away. // memcpy(&tmp_buf, ptr_, kUpb_EpsCopyInputStream_SlopBytes); // data->assign(tmp_buf, n); // ptr_ += n; +// if (enable_aliasing_) { +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, 0)); +// } +// return PopLimits(); +// } +// +// int ReadString(int n, std::string* data) { +// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, n)) return -1; +// const char* str_data = ptr_; +// if (enable_aliasing_) { +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, n)); +// } +// ptr_ = upb_EpsCopyInputStream_ReadString(&eps_, &str_data, n, arena_.ptr()); +// if (!ptr_) return -1; +// if (enable_aliasing_ && n) { +// EXPECT_GE(reinterpret_cast(str_data), +// reinterpret_cast(data_.data())); +// EXPECT_LT(reinterpret_cast(str_data), +// reinterpret_cast(data_.data() + data_.size())); +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, 0)); +// } +// data->assign(str_data, n); +// return PopLimits(); +// } +// +// bool TryPushLimit(int limit) { +// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false; +// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit)); +// return true; +// } +// +// bool IsEof() const { return eof_; } // +// private: +// int PopLimits() { // int end_limit_count = 0; // // while (IsAtLimit()) { @@ -110,18 +150,9 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // return error_ ? -1 : end_limit_count; // } // -// bool TryPushLimit(int limit) { -// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false; -// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit)); -// return true; -// } -// -// bool IsEof() const { return eof_; } -// -// private: // bool IsAtLimit() { -// return upb_EpsCopyInputStream_IsDone(&eps_, &ptr_, -// &EpsStream::IsDoneFallback); +// return upb_EpsCopyInputStream_IsDoneWithCallback( +// &eps_, &ptr_, &EpsStream::IsDoneFallback); // } // // // Return false on EOF. @@ -150,12 +181,18 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // std::string data_; // const char* ptr_; // std::vector deltas_; +// upb::Arena arena_; // bool error_ = false; // bool eof_ = false; +// bool enable_aliasing_; // }; // // // Reads N bytes from the given position. // struct ReadOp { +// int bytes; // Must be <= kUpb_EpsCopyInputStream_SlopBytes. +// }; +// +// struct ReadStringOp { // int bytes; // }; // @@ -164,14 +201,16 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // int bytes; // }; // -// typedef std::variant Op; +// typedef std::variant Op; // // struct EpsCopyTestScript { // int data_size; +// bool enable_aliasing; // std::vector ops; // }; // // auto ArbitraryEpsCopyTestScript() { +// using ::fuzztest::Arbitrary; // using ::fuzztest::InRange; // using ::fuzztest::NonNegative; // using ::fuzztest::StructOf; @@ -182,9 +221,12 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // // return StructOf( // InRange(0, max_data_size), // data_size +// Arbitrary(), // enable_aliasing // VectorOf(VariantOf( // // ReadOp // StructOf(InRange(0, kUpb_EpsCopyInputStream_SlopBytes)), +// // ReadStringOp +// StructOf(NonNegative()), // // PushLimitOp // StructOf(NonNegative())))); // } @@ -198,7 +240,7 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // } // // FakeStream fake_stream(data); -// EpsStream eps_stream(data); +// EpsStream eps_stream(data, script.enable_aliasing); // // for (const auto& op : script.ops) { // if (const ReadOp* read_op = std::get_if(&op)) { @@ -211,24 +253,82 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // EXPECT_EQ(data_fake, data_eps); // EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof()); // if (fake_stream.IsEof()) break; +// } else if (const ReadStringOp* read_op = std::get_if(&op)) { +// std::string data_fake; +// std::string data_eps; +// int fake_result = fake_stream.ReadData(read_op->bytes, &data_fake); +// int eps_result = eps_stream.ReadString(read_op->bytes, &data_eps); +// EXPECT_EQ(fake_result, eps_result); +// if (fake_result == -1) break; // Error +// EXPECT_EQ(data_fake, data_eps); +// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof()); +// if (fake_stream.IsEof()) break; // } else if (const PushLimitOp* push = std::get_if(&op)) { // EXPECT_EQ(fake_stream.TryPushLimit(push->bytes), // eps_stream.TryPushLimit(push->bytes)); +// } else { +// EXPECT_TRUE(false); // Unknown op. // } // } // } // +// // Test with: +// // $ blaze run --config=fuzztest third_party/upb:eps_copy_input_stream_test \ +// // -- --gunit_fuzz= // FUZZ_TEST(EpsCopyFuzzTest, TestAgainstFakeStream) // .WithDomains(ArbitraryEpsCopyTestScript()); // // TEST(EpsCopyFuzzTest, TestAgainstFakeStreamRegression) { // TestAgainstFakeStream({299, +// false, // { // PushLimitOp{2}, // ReadOp{14}, // }}); // } // +// TEST(EpsCopyFuzzTest, AliasingEnabledZeroSizeReadString) { +// TestAgainstFakeStream({510, true, {ReadStringOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, AliasingDisabledZeroSizeReadString) { +// TestAgainstFakeStream({510, false, {ReadStringOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadStringZero) { +// TestAgainstFakeStream({0, true, {ReadStringOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadZero) { +// TestAgainstFakeStream({0, true, {ReadOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadZeroTwice) { +// TestAgainstFakeStream({0, true, {ReadOp{0}, ReadOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadStringZeroThenRead) { +// TestAgainstFakeStream({0, true, {ReadStringOp{0}, ReadOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadStringOverflowsBufferButNotLimit) { +// TestAgainstFakeStream({351, +// false, +// { +// ReadOp{7}, +// PushLimitOp{2147483647}, +// ReadStringOp{344}, +// }}); +// } +// +// TEST(EpsCopyFuzzTest, LastBufferAliasing) { +// TestAgainstFakeStream({27, true, {ReadOp{12}, ReadStringOp{3}}}); +// } +// +// TEST(EpsCopyFuzzTest, FirstBufferAliasing) { +// TestAgainstFakeStream({7, true, {ReadStringOp{3}}}); +// } +// // end:google_only } // namespace diff --git a/upb/wire/reader.c b/upb/wire/reader.c new file mode 100644 index 0000000000..a84fb0b912 --- /dev/null +++ b/upb/wire/reader.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/wire/reader.h" + +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/types.h" + +// Must be last. +#include "upb/port/def.inc" + +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; +} + +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; + } + return ptr; +} diff --git a/upb/wire/reader.h b/upb/wire/reader.h new file mode 100644 index 0000000000..b959744027 --- /dev/null +++ b/upb/wire/reader.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_WIRE_READER_H_ +#define UPB_WIRE_READER_H_ + +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/swap_internal.h" +#include "upb/wire/types.h" + +// Must be last. +#include "upb/port/def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +// The upb_WireReader interface is suitable for general-purpose parsing of +// protobuf binary wire format. It is designed to be used along with +// upb_EpsCopyInputStream for buffering, and all parsing routines in this file +// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is +// available to read without any bounds checks. + +#define kUpb_WireReader_WireTypeMask 7 +#define kUpb_WireReader_WireTypeBits 3 + +typedef struct { + const char* ptr; + uint64_t val; +} _upb_WireReader_ReadLongVarintRet; + +_upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint( + const char* ptr, uint64_t val); + +static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, + uint64_t* val, + int maxlen, + uint64_t maxval) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = (uint32_t)byte; + return ptr + 1; + } + const char* start = ptr; + _upb_WireReader_ReadLongVarintRet res = + _upb_WireReader_ReadLongVarint(ptr, byte); + if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) || + res.val > maxval) { + return NULL; // Malformed. + } + *val = res.val; + return res.ptr; +} + +// Parses a tag into `tag`, and returns a pointer past the end of the tag, or +// NULL if there was an error in the tag data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, + uint32_t* tag) { + uint64_t val; + ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX); + if (!ptr) return NULL; + *tag = val; + return ptr; +} + +// Given a tag, returns the field number. +UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) { + return tag >> kUpb_WireReader_WireTypeBits; +} + +// Given a tag, returns the wire type. +UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { + return tag & kUpb_WireReader_WireTypeMask; +} + +UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr, + uint64_t* val) { + return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX); +} + +// Skips data for a varint, returning a pointer past the end of the varint, or +// NULL if there was an error in the varint data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_SkipVarint(const char* ptr) { + uint64_t val; + return upb_WireReader_ReadVarint(ptr, &val); +} + +// Reads a varint indicating the size of a delimited field into `size`, or +// NULL if there was an error in the varint data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) { + uint64_t size64; + ptr = upb_WireReader_ReadVarint(ptr, &size64); + if (!ptr || size64 >= INT32_MAX) return NULL; + *size = size64; + return ptr; +} + +// Reads a fixed32 field, performing byte swapping if necessary. +// +// REQUIRES: there must be at least 4 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) { + uint32_t uval; + memcpy(&uval, ptr, 4); + uval = _upb_BigEndian_Swap32(uval); + memcpy(val, &uval, 4); + return ptr + 4; +} + +// Reads a fixed64 field, performing byte swapping if necessary. +// +// REQUIRES: there must be at least 4 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) { + uint64_t uval; + memcpy(&uval, ptr, 8); + uval = _upb_BigEndian_Swap64(uval); + memcpy(val, &uval, 8); + return ptr + 8; +} + +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream); + +// Skips data for a group, returning a pointer past the end of the group, or +// NULL if there was an error parsing the group. The `tag` argument should be +// the start group tag that begins the group. The `depth_limit` argument +// indicates how many levels of recursion the group is allowed to have before +// reporting a parse error (this limit exists to protect against stack +// overflow). +// +// TODO: evaluate how the depth_limit should be specified. Do users need +// control over this? +UPB_INLINE const char* upb_WireReader_SkipGroup( + const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) { + return _upb_WireReader_SkipGroup(ptr, tag, 100, stream); +} + +UPB_INLINE const char* _upb_WireReader_SkipValue( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream) { + switch (upb_WireReader_GetWireType(tag)) { + case kUpb_WireType_Varint: + return upb_WireReader_SkipVarint(ptr); + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_Delimited: { + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!ptr) return NULL; + ptr += size; + return ptr; + } + case kUpb_WireType_StartGroup: + return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); + case kUpb_WireType_EndGroup: + return NULL; // Should be handled before now. + default: + return NULL; // Unknown wire type. + } +} + +// Skips data for a wire value of any type, returning a pointer past the end of +// the data, or NULL if there was an error parsing the group. The `tag` argument +// should be the tag that was just parsed. The `depth_limit` argument indicates +// how many levels of recursion a group is allowed to have before reporting a +// parse error (this limit exists to protect against stack overflow). +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +// +// TODO: evaluate how the depth_limit should be specified. Do users need +// control over this? +UPB_INLINE const char* upb_WireReader_SkipValue( + const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) { + return _upb_WireReader_SkipValue(ptr, tag, 100, stream); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif // UPB_WIRE_READER_H_ diff --git a/upbc/file_layout.h b/upbc/file_layout.h index 51ea2b72c0..265dfe3c83 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -180,23 +180,6 @@ class FileLayout { return layout64_.GetEnumTable(d); } - std::string GetFieldOffset(const protobuf::FieldDescriptor* f) const { - const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return UpbSize(f_32->offset, f_64->offset); - } - - std::string GetOneofCaseOffset(const protobuf::OneofDescriptor* o) const { - const protobuf::FieldDescriptor* f = o->field(0); - const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return UpbSize(~f_32->presence, ~f_64->presence); - } - std::string GetMessageSize(const protobuf::Descriptor* d) const { return UpbSize(GetMiniTable32(d)->size, GetMiniTable64(d)->size); } diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 6ce24beb8f..c20d00210d 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -137,34 +137,6 @@ std::string CTypeInternal(const protobuf::FieldDescriptor* field, } } -std::string SizeLg2(const protobuf::FieldDescriptor* field) { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - return "UPB_SIZE(2, 3)"; - case protobuf::FieldDescriptor::CPPTYPE_ENUM: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - return std::to_string(0); - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_INT32: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_INT64: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_UINT64: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_STRING: - return "UPB_SIZE(3, 4)"; - default: - fprintf(stderr, "Unexpected type"); - abort(); - } -} - std::string FloatToCLiteral(float value) { if (value == std::numeric_limits::infinity()) { return "kUpb_FltInfinity"; @@ -227,6 +199,30 @@ std::string CTypeConst(const protobuf::FieldDescriptor* field) { return CTypeInternal(field, true); } +std::string MapKeyCType(const protobuf::FieldDescriptor* map_field) { + return CType(map_field->message_type()->map_key()); +} + +std::string MapValueCType(const protobuf::FieldDescriptor* map_field) { + return CType(map_field->message_type()->map_value()); +} + +std::string MapKeySize(const protobuf::FieldDescriptor* map_field, + absl::string_view expr) { + return map_field->message_type()->map_key()->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : absl::StrCat("sizeof(", expr, ")"); +} + +std::string MapValueSize(const protobuf::FieldDescriptor* map_field, + absl::string_view expr) { + return map_field->message_type()->map_value()->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : absl::StrCat("sizeof(", expr, ")"); +} + std::string FieldInitializer(const FileLayout& layout, const protobuf::FieldDescriptor* field); @@ -436,38 +432,39 @@ void GenerateMapGetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* entry = field->message_type(); - const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); - const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( UPB_INLINE size_t $0_$1_size(const $0* msg) { - return _upb_msg_map_size(msg, $2); + const upb_MiniTableField field = $2; + const upb_Map* map = upb_Message_GetMap(msg, &field); + return map ? _upb_Map_Size(map) : 0; } )cc", - msg_name, resolved_name, layout.GetFieldOffset(field)); + msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( UPB_INLINE bool $0_$1_get(const $0* msg, $2 key, $3* val) { - return _upb_msg_map_get(msg, $4, &key, $5, val, $6); + const upb_MiniTableField field = $4; + const upb_Map* map = upb_Message_GetMap(msg, &field); + if (!map) return false; + return _upb_Map_Get(map, &key, $5, val, $6); } )cc", - msg_name, resolved_name, CType(key), CType(val), - layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)", - val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(*val)"); + msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key"), + MapValueSize(field, "*val")); output( R"cc( UPB_INLINE $0 $1_$2_next(const $1* msg, size_t* iter) { - return ($0)_upb_msg_map_next(msg, $3, iter); + const upb_MiniTableField field = $3; + const upb_Map* map = upb_Message_GetMap(msg, &field); + if (!map) return NULL; + return ($0)_upb_map_next(map, iter); } )cc", - CTypeConst(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CTypeConst(field), msg_name, resolved_name, + FieldInitializer(layout, field)); } void GenerateMapEntryGetters(const protobuf::FieldDescriptor* field, @@ -493,12 +490,20 @@ void GenerateRepeatedGetters(const protobuf::FieldDescriptor* field, Output& output) { output( R"cc( - UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* len) { - return ($0 const*)_upb_array_accessor(msg, $3, len); + UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* size) { + const upb_MiniTableField field = $3; + const upb_Array* arr = upb_Message_GetArray(msg, &field); + if (arr) { + if (size) *size = arr->size; + return ($0 const*)_upb_array_constptr(arr); + } else { + if (size) *size = 0; + return NULL; + } } )cc", CTypeConst(field), msg_name, ResolveFieldName(field, field_names), - layout.GetFieldOffset(field)); + FieldInitializer(layout, field)); } void GenerateScalarGetters(const protobuf::FieldDescriptor* field, @@ -539,46 +544,50 @@ void GenerateMapSetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* entry = field->message_type(); - const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); - const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE void $0_$1_clear($0* msg) { _upb_msg_map_clear(msg, $2); } + UPB_INLINE void $0_$1_clear($0* msg) { + const upb_MiniTableField field = $2; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return; + _upb_Map_Clear(map); + } )cc", - msg_name, resolved_name, layout.GetFieldOffset(field)); + msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) { - return _upb_msg_map_set(msg, $4, &key, $5, &val, $6, a); + const upb_MiniTableField field = $4; + upb_Map* map = _upb_Message_GetOrCreateMutableMap(msg, &field, $5, $6, a); + return _upb_Map_Insert(map, &key, $5, &val, $6, a) != + kUpb_MapInsertStatus_OutOfMemory; } )cc", - msg_name, resolved_name, CType(key), CType(val), - layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)", - val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(val)"); + msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key"), + MapValueSize(field, "val")); output( R"cc( UPB_INLINE bool $0_$1_delete($0* msg, $2 key) { - return _upb_msg_map_delete(msg, $3, &key, $4); + const upb_MiniTableField field = $3; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return false; + return _upb_Map_Delete(map, &key, $4, NULL); } )cc", - msg_name, resolved_name, CType(key), layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)"); + msg_name, resolved_name, MapKeyCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key")); output( R"cc( UPB_INLINE $0 $1_$2_nextmutable($1* msg, size_t* iter) { - return ($0)_upb_msg_map_next(msg, $3, iter); + const upb_MiniTableField field = $3; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return NULL; + return ($0)_upb_map_next(map, iter); } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); } void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, @@ -589,41 +598,58 @@ void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* len) { - return ($0*)_upb_array_mutable_accessor(msg, $3, len); + UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* size) { + upb_MiniTableField field = $3; + upb_Array* arr = upb_Message_GetMutableArray(msg, &field); + if (arr) { + if (size) *size = arr->size; + return ($0*)_upb_array_ptr(arr); + } else { + if (size) *size = 0; + return NULL; + } } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( - UPB_INLINE $0* $1_resize_$2($1* msg, size_t len, upb_Arena* arena) { - return ($0*)_upb_Array_Resize_accessor2(msg, $3, len, $4, arena); + UPB_INLINE $0* $1_resize_$2($1* msg, size_t size, upb_Arena* arena) { + upb_MiniTableField field = $3; + return ($0*)upb_Message_ResizeArray(msg, &field, size, arena); } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), - SizeLg2(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { output( R"cc( UPB_INLINE struct $0* $1_add_$2($1* msg, upb_Arena* arena) { + upb_MiniTableField field = $4; + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { + return NULL; + } struct $0* sub = (struct $0*)_upb_Message_New(&$3, arena); - bool ok = _upb_Array_Append_accessor2(msg, $4, $5, &sub, arena); - if (!ok) return NULL; + if (!arr || !sub) return NULL; + _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub)); return sub; } )cc", MessageName(field->message_type()), msg_name, resolved_name, - MessageInit(field->message_type()), layout.GetFieldOffset(field), - SizeLg2(field)); + MessageInit(field->message_type()), FieldInitializer(layout, field)); } else { output( R"cc( UPB_INLINE bool $1_add_$2($1* msg, $0 val, upb_Arena* arena) { - return _upb_Array_Append_accessor2(msg, $3, $4, &val, arena); + upb_MiniTableField field = $3; + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { + return false; + } + _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val)); + return true; } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), - SizeLg2(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); } } @@ -1156,7 +1182,7 @@ void WriteMessageField(const upb_MiniTableField* field64, // Writes a single message into a .upb.c source file. void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, - Output& output, bool fasttable_enabled) { + Output& output) { std::string msg_name = ToCIdent(message->full_name()); std::string fields_array_ref = "NULL"; std::string submsgs_array_ref = "NULL"; @@ -1199,9 +1225,7 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, std::vector table; uint8_t table_mask = -1; - if (fasttable_enabled) { - table = FastDecodeTable(message, layout); - } + table = FastDecodeTable(message, layout); if (table.size() > 1) { assert((table.size() & (table.size() - 1)) == 0); @@ -1230,7 +1254,7 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, output(" {0x$1, &$0},\n", ent.first, absl::StrCat(absl::Hex(ent.second, absl::kZeroPad16))); } - output(" }),\n"); + output(" })\n"); } output("};\n\n"); } @@ -1284,15 +1308,14 @@ int WriteEnums(const FileLayout& layout, Output& output) { return this_file_enums.size(); } -int WriteMessages(const FileLayout& layout, Output& output, - bool fasttable_enabled) { +int WriteMessages(const FileLayout& layout, Output& output) { const protobuf::FileDescriptor* file = layout.descriptor(); std::vector file_messages = SortedMessages(file); if (file_messages.empty()) return 0; for (auto message : file_messages) { - WriteMessage(message, layout, output, fasttable_enabled); + WriteMessage(message, layout, output); } output("static const upb_MiniTable *$0[$1] = {\n", kMessagesInit, @@ -1357,8 +1380,7 @@ int WriteExtensions(const FileLayout& layout, Output& output) { } // Writes a .upb.cc source file. -void WriteSource(const FileLayout& layout, Output& output, - bool fasttable_enabled) { +void WriteSource(const FileLayout& layout, Output& output) { const protobuf::FileDescriptor* file = layout.descriptor(); EmitFileWarning(file, output); @@ -1379,7 +1401,7 @@ void WriteSource(const FileLayout& layout, Output& output, "#include \"upb/port/def.inc\"\n" "\n"); - int msg_count = WriteMessages(layout, output, fasttable_enabled); + int msg_count = WriteMessages(layout, output); int ext_count = WriteExtensions(layout, output); int enum_count = WriteEnums(layout, output); @@ -1410,17 +1432,12 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, const std::string& parameter, protoc::GeneratorContext* context, std::string* error) const { - bool fasttable_enabled = false; std::vector> params; google::protobuf::compiler::ParseGeneratorParameter(parameter, ¶ms); for (const auto& pair : params) { - if (pair.first == "fasttable") { - fasttable_enabled = true; - } else { - *error = "Unknown parameter: " + pair.first; - return false; - } + *error = "Unknown parameter: " + pair.first; + return false; } FileLayout layout(file); @@ -1433,7 +1450,7 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, std::unique_ptr c_output_stream( context->Open(SourceFilename(file))); Output c_output(c_output_stream.get()); - WriteSource(layout, c_output, fasttable_enabled); + WriteSource(layout, c_output); return true; } diff --git a/upbc/upbc_so.c b/upbc/upbc_so.c index 16df62e7e1..6e498865be 100644 --- a/upbc/upbc_so.c +++ b/upbc/upbc_so.c @@ -32,6 +32,7 @@ #endif #include "upb/collections/array.h" +#include "upb/collections/map.h" #include "upb/message/accessors.h" #include "upb/message/message.h" #include "upb/mini_table/decode.h" @@ -39,60 +40,3 @@ // Must be last. #include "upb/port/def.inc" - -UPB_API bool upb_Array_AppendBool(upb_Array* array, bool val, - upb_Arena* arena) { - const upb_MessageValue mv = {.bool_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendDouble(upb_Array* array, double val, - upb_Arena* arena) { - const upb_MessageValue mv = {.double_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendFloat(upb_Array* array, float val, - upb_Arena* arena) { - const upb_MessageValue mv = {.float_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendInt32(upb_Array* array, int32_t val, - upb_Arena* arena) { - const upb_MessageValue mv = {.int32_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendUInt32(upb_Array* array, uint32_t val, - upb_Arena* arena) { - const upb_MessageValue mv = {.uint32_val = val}; - return upb_Array_Append(array, mv, arena); -} - -//////////////////////////////////////////////////////////////////////////////// - -UPB_API void upb_Array_SetBool(upb_Array* array, size_t i, bool val) { - const upb_MessageValue mv = {.bool_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetDouble(upb_Array* array, size_t i, double val) { - const upb_MessageValue mv = {.double_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetFloat(upb_Array* array, size_t i, float val) { - const upb_MessageValue mv = {.float_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetInt32(upb_Array* array, size_t i, int32_t val) { - const upb_MessageValue mv = {.int32_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetUInt32(upb_Array* array, size_t i, uint32_t val) { - const upb_MessageValue mv = {.uint32_val = val}; - upb_Array_Set(array, i, mv); -}