diff --git a/CHANGES.txt b/CHANGES.txt index ec9d0723e3..cb5bff5da6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -11,6 +11,16 @@ Unreleased Changes (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) * Change the API of FieldAccessListener to support callbacks for info extraction * make field_access_injector private +2021-05-07 version 3.17.1 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) + PHP + * Fixed JSON parser to allow multiple values from the same oneof as long as + all but one are null. + + Ruby + * Fixed JSON parser to allow multiple values from the same oneof as long as + all but one are null. + + 2021-05-07 version 3.17.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) Protocol Compiler diff --git a/autogen.sh b/autogen.sh index d00d21727c..105bf09b67 100755 --- a/autogen.sh +++ b/autogen.sh @@ -30,8 +30,12 @@ set -ex # The absence of a m4 directory in googletest causes autoreconf to fail when # building under the CentOS docker image. It's a warning in regular build on -# Ubuntu/gLinux as well. -mkdir -p third_party/googletest/m4 +# Ubuntu/gLinux as well. (This is only needed if git submodules have been +# initialized, which is typically only needed for testing; see the installation +# instructions for details.) +if test -d third_party/googletest; then + mkdir -p third_party/googletest/m4 +fi # TODO(kenton): Remove the ",no-obsolete" part and fix the resulting warnings. autoreconf -f -i -Wall,no-obsolete diff --git a/configure.ac b/configure.ac index c36328cd8d..c8a56cf7c8 100644 --- a/configure.ac +++ b/configure.ac @@ -125,7 +125,7 @@ AC_LINK_IFELSE( [have_ld_version_script=yes; AC_MSG_RESULT(yes)], [have_ld_version_script=no; AC_MSG_RESULT(no)]) LDFLAGS=$save_LDFLAGS -AM_CONDITIONAL([HAVE_LD_VERSION_SCRIPT], [test "$have_ld_version_script" == "yes"]) +AM_CONDITIONAL([HAVE_LD_VERSION_SCRIPT], [test "$have_ld_version_script" = "yes"]) # Checks for header files. AC_HEADER_STDC diff --git a/conformance/failure_list_php_c.txt b/conformance/failure_list_php_c.txt index 1982029112..63c7e8a024 100644 --- a/conformance/failure_list_php_c.txt +++ b/conformance/failure_list_php_c.txt @@ -1,4 +1,2 @@ Recommended.Proto2.JsonInput.FieldNameExtension.Validator Required.Proto2.JsonInput.StoresDefaultPrimitive.Validator -Required.Proto3.JsonInput.OneofFieldNullSecond.JsonOutput -Required.Proto3.JsonInput.OneofFieldNullSecond.ProtobufOutput diff --git a/conformance/failure_list_ruby.txt b/conformance/failure_list_ruby.txt index ea5de36609..4938202ad7 100644 --- a/conformance/failure_list_ruby.txt +++ b/conformance/failure_list_ruby.txt @@ -56,5 +56,3 @@ Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.PackedInput.UnpackedOu Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.UnpackedInput.UnpackedOutput.ProtobufOutput Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.PackedInput.UnpackedOutput.ProtobufOutput Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.UnpackedInput.UnpackedOutput.ProtobufOutput -Required.Proto3.JsonInput.OneofFieldNullSecond.JsonOutput -Required.Proto3.JsonInput.OneofFieldNullSecond.ProtobufOutput diff --git a/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs index 0ad286f378..5e72525fc9 100644 --- a/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs @@ -161,12 +161,21 @@ namespace Google.Protobuf private static void AssertReadFromParseContext(ReadOnlySequence input, ParseContextAssertAction assertAction, bool assertIsAtEnd) { + // Check as ReadOnlySequence ParseContext.Initialize(input, out ParseContext parseCtx); assertAction(ref parseCtx); if (assertIsAtEnd) { Assert.IsTrue(SegmentedBufferHelper.IsAtEnd(ref parseCtx.buffer, ref parseCtx.state)); } + + // Check as ReadOnlySpan + ParseContext.Initialize(input.ToArray().AsSpan(), out ParseContext spanParseContext); + assertAction(ref spanParseContext); + if (assertIsAtEnd) + { + Assert.IsTrue(SegmentedBufferHelper.IsAtEnd(ref spanParseContext.buffer, ref spanParseContext.state)); + } } [Test] diff --git a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs index 65d2fe0395..05f1e36f96 100644 --- a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs +++ b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs @@ -41,32 +41,38 @@ namespace Google.Protobuf { public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Action assert) where T : IMessage { - var parsedStream = parser.ParseFrom(bytes); + var parsedMsg = parser.ParseFrom(bytes); + assert(parsedMsg); // Load content as single segment - var parsedBuffer = parser.ParseFrom(new ReadOnlySequence(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(new ReadOnlySequence(bytes)); + assert(parsedMsg); // Load content as multiple segments - parsedBuffer = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); + assert(parsedMsg); - assert(parsedStream); + // Load content as ReadOnlySpan + parsedMsg = parser.ParseFrom(new ReadOnlySpan(bytes)); + assert(parsedMsg); } public static void AssertReadingMessage(MessageParser parser, byte[] bytes, Action assert) { - var parsedStream = parser.ParseFrom(bytes); + var parsedMsg = parser.ParseFrom(bytes); + assert(parsedMsg); // Load content as single segment - var parsedBuffer = parser.ParseFrom(new ReadOnlySequence(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(new ReadOnlySequence(bytes)); + assert(parsedMsg); // Load content as multiple segments - parsedBuffer = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); - assert(parsedBuffer); + parsedMsg = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); + assert(parsedMsg); - assert(parsedStream); + // Load content as ReadOnlySpan + parsedMsg = parser.ParseFrom(new ReadOnlySpan(bytes)); + assert(parsedMsg); } public static void AssertReadingMessageThrows(MessageParser parser, byte[] bytes) @@ -76,6 +82,8 @@ namespace Google.Protobuf Assert.Throws(() => parser.ParseFrom(bytes)); Assert.Throws(() => parser.ParseFrom(new ReadOnlySequence(bytes))); + + Assert.Throws(() => parser.ParseFrom(new ReadOnlySpan(bytes))); } public static void AssertRoundtrip(MessageParser parser, T message, Action additionalAssert = null) where T : IMessage @@ -87,20 +95,24 @@ namespace Google.Protobuf message.WriteTo(bufferWriter); Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray(), "Both serialization approaches need to result in the same data."); + var parsedMsg = parser.ParseFrom(bytes); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); + // Load content as single segment - var parsedBuffer = parser.ParseFrom(new ReadOnlySequence(bytes)); - Assert.AreEqual(message, parsedBuffer); - additionalAssert?.Invoke(parsedBuffer); + parsedMsg = parser.ParseFrom(new ReadOnlySequence(bytes)); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); // Load content as multiple segments - parsedBuffer = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); - Assert.AreEqual(message, parsedBuffer); - additionalAssert?.Invoke(parsedBuffer); - - var parsedStream = parser.ParseFrom(bytes); - - Assert.AreEqual(message, parsedStream); - additionalAssert?.Invoke(parsedStream); + parsedMsg = parser.ParseFrom(ReadOnlySequenceFactory.CreateWithContent(bytes)); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); + + // Load content as ReadOnlySpan + parsedMsg = parser.ParseFrom(new ReadOnlySpan(bytes)); + Assert.AreEqual(message, parsedMsg); + additionalAssert?.Invoke(parsedMsg); } public static void AssertWritingMessage(IMessage message) diff --git a/csharp/src/Google.Protobuf/CodedInputStream.cs b/csharp/src/Google.Protobuf/CodedInputStream.cs index b09f96ce28..27b23c0d9c 100644 --- a/csharp/src/Google.Protobuf/CodedInputStream.cs +++ b/csharp/src/Google.Protobuf/CodedInputStream.cs @@ -435,8 +435,7 @@ namespace Google.Protobuf // we will need to switch back again to CodedInputStream-based parsing (which involves copying and storing the state) to be able to // invoke the legacy MergeFrom(CodedInputStream) method. // For now, this inefficiency is fine, considering this is only a backward-compatibility scenario (and regenerating the code fixes it). - var span = new ReadOnlySpan(buffer); - ParseContext.Initialize(ref span, ref state, out ParseContext ctx); + ParseContext.Initialize(buffer.AsSpan(), ref state, out ParseContext ctx); try { ParsingPrimitivesMessages.ReadMessage(ref ctx, builder); diff --git a/csharp/src/Google.Protobuf/MessageExtensions.cs b/csharp/src/Google.Protobuf/MessageExtensions.cs index 36a9df7286..c4b3f82343 100644 --- a/csharp/src/Google.Protobuf/MessageExtensions.cs +++ b/csharp/src/Google.Protobuf/MessageExtensions.cs @@ -79,6 +79,15 @@ namespace Google.Protobuf public static void MergeFrom(this IMessage message, Stream input) => MergeFrom(message, input, false, null); + /// + /// Merges data from the given span into an existing message. + /// + /// The message to merge the data into. + /// Span containing the data to merge, which must be protobuf-encoded binary data. + [SecuritySafeCritical] + public static void MergeFrom(this IMessage message, ReadOnlySpan span) => + MergeFrom(message, span, false, null); + /// /// Merges length-delimited data from the given stream into an existing message. /// @@ -294,6 +303,16 @@ namespace Google.Protobuf ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref ctx.state); } + [SecuritySafeCritical] + internal static void MergeFrom(this IMessage message, ReadOnlySpan data, bool discardUnknownFields, ExtensionRegistry registry) + { + ParseContext.Initialize(data, out ParseContext ctx); + ctx.DiscardUnknownFields = discardUnknownFields; + ctx.ExtensionRegistry = registry; + ParsingPrimitivesMessages.ReadRawMessage(ref ctx, message); + ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref ctx.state); + } + internal static void MergeDelimitedFrom(this IMessage message, Stream input, bool discardUnknownFields, ExtensionRegistry registry) { ProtoPreconditions.CheckNotNull(message, "message"); diff --git a/csharp/src/Google.Protobuf/MessageParser.cs b/csharp/src/Google.Protobuf/MessageParser.cs index f8b26c2348..30a25a8698 100644 --- a/csharp/src/Google.Protobuf/MessageParser.cs +++ b/csharp/src/Google.Protobuf/MessageParser.cs @@ -128,6 +128,19 @@ namespace Google.Protobuf return message; } + /// + /// Parses a message from the given span. + /// + /// The data to parse. + /// The parsed message. + [SecuritySafeCritical] + public IMessage ParseFrom(ReadOnlySpan data) + { + IMessage message = factory(); + message.MergeFrom(data, DiscardUnknownFields, Extensions); + return message; + } + /// /// Parses a length-delimited message from the given stream. /// @@ -315,6 +328,19 @@ namespace Google.Protobuf return message; } + /// + /// Parses a message from the given span. + /// + /// The data to parse. + /// The parsed message. + [SecuritySafeCritical] + public new T ParseFrom(ReadOnlySpan data) + { + T message = factory(); + message.MergeFrom(data, DiscardUnknownFields, Extensions); + return message; + } + /// /// Parses a length-delimited message from the given stream. /// diff --git a/csharp/src/Google.Protobuf/ParseContext.cs b/csharp/src/Google.Protobuf/ParseContext.cs index bf46236565..7b278b5a92 100644 --- a/csharp/src/Google.Protobuf/ParseContext.cs +++ b/csharp/src/Google.Protobuf/ParseContext.cs @@ -58,8 +58,27 @@ namespace Google.Protobuf internal ReadOnlySpan buffer; internal ParserInternalState state; + /// + /// Initialize a , building all from defaults and + /// the given . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void Initialize(ReadOnlySpan buffer, out ParseContext ctx) + { + ParserInternalState state = default; + state.sizeLimit = DefaultSizeLimit; + state.recursionLimit = DefaultRecursionLimit; + state.currentLimit = int.MaxValue; + state.bufferSize = buffer.Length; + + Initialize(buffer, ref state, out ctx); + } + + /// + /// Initialize a using existing , e.g. from . + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void Initialize(ref ReadOnlySpan buffer, ref ParserInternalState state, out ParseContext ctx) + internal static void Initialize(ReadOnlySpan buffer, ref ParserInternalState state, out ParseContext ctx) { ctx.buffer = buffer; ctx.state = state; diff --git a/java/kotlin/pom.xml b/java/kotlin/pom.xml index e1c1c7f4ab..f5de8cd3bd 100644 --- a/java/kotlin/pom.xml +++ b/java/kotlin/pom.xml @@ -18,6 +18,7 @@ 1.5.0 + 1.4.32 @@ -234,7 +235,46 @@ + + org.jetbrains.dokka + dokka-maven-plugin + ${dokka.version} + + + pre-site + + dokka + + + + + gcode/kotlin + ${project.basedir}/src/main/kotlin/com/google/protobuf + + + + https://developers.google.com/protocol-buffers/docs/reference/java/ + + + + + + org.jetbrains.dokka + gfm-plugin + ${dokka.version} + + + + + + + + jcenter + JCenter + https://jcenter.bintray.com/ + + diff --git a/js/README.md b/js/README.md index 2f5490b108..dcc9e2b698 100644 --- a/js/README.md +++ b/js/README.md @@ -39,9 +39,8 @@ If you want, you can compile `protoc` from source instead. To do this follow the instructions in [the top-level README](https://github.com/protocolbuffers/protobuf/blob/master/src/README.md). -Once you have `protoc` compiled, you can run the tests by typing: +Once you have `protoc` compiled, you can run the tests provided along with our project to examine whether it can run successfully. In order to do this, you should download the Protocol Buffer source code from the release page with the link above. Then extract the source code and navigate to the folder named `js` containing a `package.json` file and a series of test files. In this folder, you can run the commands below to run the tests automatically. - $ cd js $ npm install $ npm test diff --git a/objectivec/README.md b/objectivec/README.md index 2583779d38..bbe5726d45 100644 --- a/objectivec/README.md +++ b/objectivec/README.md @@ -194,4 +194,4 @@ Documentation The complete documentation for Protocol Buffers is available via the web at: - https://developers.google.com/protocol-buffers/ +https://developers.google.com/protocol-buffers/ diff --git a/php/ext/google/protobuf/message.c b/php/ext/google/protobuf/message.c index 2d9f9b4cc8..7cd7d23208 100644 --- a/php/ext/google/protobuf/message.c +++ b/php/ext/google/protobuf/message.c @@ -149,6 +149,9 @@ static bool Message_set(Message *intern, const upb_fielddef *f, zval *val) { } else if (upb_fielddef_isseq(f)) { msgval.array_val = RepeatedField_GetUpbArray(val, TypeInfo_Get(f), arena); if (!msgval.array_val) return false; + } else if (upb_fielddef_issubmsg(f) && Z_TYPE_P(val) == IS_NULL) { + upb_msg_clearfield(intern->msg, f); + return true; } else { if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) return false; } @@ -198,8 +201,6 @@ static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) !upb_msg_field_done(&i); upb_msg_field_next(&i)) { const upb_fielddef *f = upb_msg_iter_field(&i); - upb_msgval val1 = upb_msg_get(m1, f); - upb_msgval val2 = upb_msg_get(m2, f); if (upb_fielddef_haspresence(f)) { if (upb_msg_has(m1, f) != upb_msg_has(m2, f)) { @@ -208,6 +209,9 @@ static bool MessageEq(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) if (!upb_msg_has(m1, f)) continue; } + upb_msgval val1 = upb_msg_get(m1, f); + upb_msgval val2 = upb_msg_get(m2, f); + if (upb_fielddef_ismap(f)) { if (!MapEq(val1.map_val, val2.map_val, MapType_Get(f))) return false; } else if (upb_fielddef_isseq(f)) { @@ -454,11 +458,6 @@ bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena, ZVAL_DEREF(val); } - if (Z_TYPE_P(val) == IS_NULL) { - *msg = NULL; - return true; - } - if (Z_TYPE_P(val) == IS_OBJECT && instanceof_function(Z_OBJCE_P(val), desc->class_entry)) { Message *intern = (Message*)Z_OBJ_P(val); @@ -466,7 +465,8 @@ bool Message_GetUpbMessage(zval *val, const Descriptor *desc, upb_arena *arena, *msg = intern->msg; return true; } else { - zend_throw_exception_ex(NULL, 0, "Given value is not an instance of %s.", + zend_throw_exception_ex(zend_ce_type_error, 0, + "Given value is not an instance of %s.", ZSTR_VAL(desc->class_entry->name)); return false; } @@ -1051,7 +1051,10 @@ PHP_METHOD(Message, writeOneof) { f = upb_msgdef_itof(intern->desc->msgdef, field_num); - if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) { + if (upb_fielddef_issubmsg(f) && Z_TYPE_P(val) == IS_NULL) { + upb_msg_clearfield(intern->msg, f); + return; + } else if (!Convert_PhpToUpb(val, &msgval, TypeInfo_Get(f), arena)) { return; } diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 913dfad7e9..774c8d22eb 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -1,27 +1,54 @@ /* Amalgamated source file */ #include "php-upb.h" /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -137,9 +164,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#if defined(__has_attribute) +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -150,7 +208,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -194,8 +252,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif +/** upb/decode.c ************************************************************/ #include #include @@ -891,7 +950,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, state.end_group = DECODE_NOGROUP; state.arena.head = arena->head; state.arena.last_size = arena->last_size; - state.arena.cleanups = arena->cleanups; + state.arena.cleanup_metadata = arena->cleanup_metadata; state.arena.parent = arena; if (UPB_UNLIKELY(UPB_SETJMP(state.err))) { @@ -902,7 +961,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, arena->head.ptr = state.arena.head.ptr; arena->head.end = state.arena.head.end; - arena->cleanups = state.arena.cleanups; + arena->cleanup_metadata = state.arena.cleanup_metadata; return ok; } @@ -911,6 +970,8 @@ bool _upb_decode(const char *buf, size_t size, void *msg, #undef OP_VARPCK_LG2 #undef OP_STRING #undef OP_SUBMSG + +/** upb/encode.c ************************************************************/ /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ @@ -1386,7 +1447,7 @@ char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, return ret; } - +/** upb/msg.c ************************************************************/ /** upb_msg *******************************************************************/ @@ -1517,7 +1578,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return NULL; } - upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a)); + upb_strtable_init(&map->table, 4, a); map->key_size = key_size; map->val_size = value_size; @@ -1638,11 +1699,13 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); return true; } + +/** upb/table.c ************************************************************/ /* -** upb_table Implementation -** -** Implementation is heavily inspired by Lua's ltable.c. -*/ + * upb_table Implementation + * + * Implementation is heavily inspired by Lua's ltable.c. + */ #include @@ -1663,9 +1726,15 @@ static const double MAX_LOAD = 0.85; * cache effects). The lower this is, the more memory we'll use. */ static const double MIN_DENSITY = 0.1; -bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } +static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } -int log2ceil(uint64_t v) { +static upb_value _upb_value_val(uint64_t val) { + upb_value ret; + _upb_value_setval(&ret, val); + return ret; +} + +static int log2ceil(uint64_t v) { int ret = 0; bool pow2 = is_pow2(v); while (v >>= 1) ret++; @@ -1673,11 +1742,7 @@ int log2ceil(uint64_t v) { return UPB_MIN(UPB_MAXARRSIZE, ret); } -char *upb_strdup(const char *s, upb_alloc *a) { - return upb_strdup2(s, strlen(s), a); -} - -char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { +char *upb_strdup2(const char *s, size_t len, upb_arena *a) { size_t n; char *p; @@ -1686,7 +1751,7 @@ char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { /* Always null-terminate, even if binary data; but don't rely on the input to * have a null-terminating byte since it may be a raw binary buffer. */ n = len + 1; - p = upb_malloc(a, n); + p = upb_arena_malloc(a, n); if (p) { memcpy(p, s, len); p[len] = 0; @@ -1721,16 +1786,24 @@ typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); /* Base table (shared code) ***************************************************/ -/* For when we need to cast away const. */ -static upb_tabent *mutable_entries(upb_table *t) { - return (upb_tabent*)t->entries; +static uint32_t upb_inthash(uintptr_t key) { + return (uint32_t)key; +} + +static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { + return t->entries + (hash & t->mask); +} + +static bool upb_arrhas(upb_tabval key) { + return key.val != (uint64_t)-1; } + static bool isfull(upb_table *t) { return t->count == t->max_count; } -static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { +static bool init(upb_table *t, uint8_t size_lg2, upb_arena *a) { size_t bytes; t->count = 0; @@ -1739,21 +1812,17 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { t->max_count = upb_table_size(t) * MAX_LOAD; bytes = upb_table_size(t) * sizeof(upb_tabent); if (bytes > 0) { - t->entries = upb_malloc(a, bytes); + t->entries = upb_arena_malloc(a, bytes); if (!t->entries) return false; - memset(mutable_entries(t), 0, bytes); + memset(t->entries, 0, bytes); } else { t->entries = NULL; } return true; } -static void uninit(upb_table *t, upb_alloc *a) { - upb_free(a, mutable_entries(t)); -} - static upb_tabent *emptyent(upb_table *t, upb_tabent *e) { - upb_tabent *begin = mutable_entries(t); + upb_tabent *begin = t->entries; upb_tabent *end = begin + upb_table_size(t); for (e = e + 1; e < end; e++) { if (upb_tabent_isempty(e)) return e; @@ -1903,9 +1972,9 @@ static size_t begin(const upb_table *t) { /* A simple "subclass" of upb_table that only adds a hash function for strings. */ -static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { +static upb_tabkey strcopy(lookupkey_t k2, upb_arena *a) { uint32_t len = (uint32_t) k2.str.len; - char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); + char *str = upb_arena_malloc(a, k2.str.len + sizeof(uint32_t) + 1); if (str == NULL) return 0; memcpy(str, &len, sizeof(uint32_t)); if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); @@ -1929,9 +1998,7 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) { return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); } -bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_strtable_init(upb_strtable *t, size_t expected_size, upb_arena *a) { // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator. size_t need_entries = (expected_size + 1) * 1204 / 1024; UPB_ASSERT(need_entries >= expected_size * 0.85); @@ -1945,14 +2012,7 @@ void upb_strtable_clear(upb_strtable *t) { memset((char*)t->t.entries, 0, bytes); } -void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { - size_t i; - for (i = 0; i < upb_table_size(&t->t); i++) - upb_free(a, (void*)t->t.entries[i].key); - uninit(&t->t, a); -} - -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a) { upb_strtable new_table; upb_strtable_iter i; @@ -1961,17 +2021,15 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { upb_strview key = upb_strtable_iter_key(&i); - upb_strtable_insert3( - &new_table, key.data, key.size, - upb_strtable_iter_value(&i), a); + upb_strtable_insert(&new_table, key.data, key.size, + upb_strtable_iter_value(&i), a); } - upb_strtable_uninit2(t, a); *t = new_table; return true; } -bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, - upb_value v, upb_alloc *a) { +bool upb_strtable_insert(upb_strtable *t, const char *k, size_t len, + upb_value v, upb_arena *a) { lookupkey_t key; upb_tabkey tabkey; uint32_t hash; @@ -1998,19 +2056,11 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, return lookup(&t->t, strkey2(key, len), v, hash, &streql); } -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc) { +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val) { uint32_t hash = table_hash(key, len); upb_tabkey tabkey; - if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { - if (alloc) { - /* Arena-based allocs don't need to free and won't pass this. */ - upb_free(alloc, (void*)tabkey); - } - return true; - } else { - return false; - } + return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql); } /* Iteration */ @@ -2108,7 +2158,7 @@ static void check(upb_inttable *t) { } bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, - upb_alloc *a) { + upb_arena *a) { size_t array_bytes; if (!init(&t->t, hsize_lg2, a)) return false; @@ -2117,9 +2167,8 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, t->array_size = UPB_MAX(1, asize); t->array_count = 0; array_bytes = t->array_size * sizeof(upb_value); - t->array = upb_malloc(a, array_bytes); + t->array = upb_arena_malloc(a, array_bytes); if (!t->array) { - uninit(&t->t, a); return false; } memset(mutable_array(t), 0xff, array_bytes); @@ -2127,18 +2176,12 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, return true; } -bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_inttable_init(upb_inttable *t, upb_arena *a) { return upb_inttable_sizedinit(t, 0, 4, a); } -void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { - uninit(&t->t, a); - upb_free(a, mutable_array(t)); -} - -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a) { +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a) { upb_tabval tabval; tabval.val = val.val; UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ @@ -2169,7 +2212,6 @@ bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, UPB_ASSERT(t->t.count == new_table.count); - uninit(&t->t, a); t->t = new_table; } insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); @@ -2213,21 +2255,7 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { return success; } -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a) { - return upb_inttable_insert2(t, (uintptr_t)key, val, a); -} - -bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, - upb_value *v) { - return upb_inttable_lookup(t, (uintptr_t)key, v); -} - -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { - return upb_inttable_remove(t, (uintptr_t)key, val); -} - -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { +void upb_inttable_compact(upb_inttable *t, upb_arena *a) { /* A power-of-two histogram of the table keys. */ size_t counts[UPB_MAXARRSIZE + 1] = {0}; @@ -2275,12 +2303,11 @@ void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { upb_inttable_begin(&i, t); for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { uintptr_t k = upb_inttable_iter_key(&i); - upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); + upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a); } UPB_ASSERT(new_t.array_size == arr_size); UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); } - upb_inttable_uninit2(t, a); *t = new_t; } @@ -2354,6 +2381,7 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, i1->array_part == i2->array_part; } +/** upb/upb.c ************************************************************/ #include #include @@ -2420,6 +2448,19 @@ static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, } } +static uint32_t *upb_cleanup_pointer(uintptr_t cleanup_metadata) { + return (uint32_t *)(cleanup_metadata & ~0x1); +} + +static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) { + return cleanup_metadata & 0x1; +} + +static uintptr_t upb_cleanup_metadata(uint32_t *cleanup, + bool has_initial_block) { + return (uintptr_t)cleanup | has_initial_block; +} + upb_alloc upb_alloc_global = {&upb_global_allocfunc}; /* upb_arena ******************************************************************/ @@ -2465,7 +2506,8 @@ static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr, a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char); a->head.end = UPB_PTR_AT(block, size, char); - a->cleanups = &block->cleanups; + a->cleanup_metadata = upb_cleanup_metadata( + &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata)); UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr); } @@ -2513,6 +2555,7 @@ upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) { a->refcount = 1; a->freelist = NULL; a->freelist_tail = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, false); upb_arena_addblock(a, a, mem, n); @@ -2540,7 +2583,7 @@ upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { a->head.ptr = mem; a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); a->freelist = NULL; - a->cleanups = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, true); return a; } @@ -2575,15 +2618,17 @@ void upb_arena_free(upb_arena *a) { bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { cleanup_ent *ent; + uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata); - if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { + if (!cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent)); + cleanups = upb_cleanup_pointer(a->cleanup_metadata); } a->head.end -= sizeof(cleanup_ent); ent = (cleanup_ent*)a->head.end; - (*a->cleanups)++; + (*cleanups)++; UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent)); ent->cleanup = func; @@ -2592,11 +2637,18 @@ bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { return true; } -void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { +bool upb_arena_fuse(upb_arena *a1, upb_arena *a2) { upb_arena *r1 = arena_findroot(a1); upb_arena *r2 = arena_findroot(a2); - if (r1 == r2) return; /* Already fused. */ + if (r1 == r2) return true; /* Already fused. */ + + /* Do not fuse initial blocks since we cannot lifetime extend them. */ + if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false; + if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false; + + /* Only allow fuse with a common allocator */ + if (r1->block_alloc != r2->block_alloc) return false; /* We want to join the smaller tree to the larger tree. * So swap first if they are backwards. */ @@ -2614,12 +2666,15 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { r1->freelist = r2->freelist; } r2->parent = r1; + return true; } -// Fast decoder: ~3x the speed of decode.c, but x86-64 specific. + +/** upb/decode_fast.c ************************************************************/ +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. // Also the table size grows by 2x. // -// Could potentially be ported to ARM64 or other 64-bit archs that pass at -// least six arguments in registers. +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. // // The overall design is to create specialized functions for every possible // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch @@ -2639,8 +2694,10 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data -#define RETURN_GENERIC(m) \ - /* fprintf(stderr, m); */ \ +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ return fastdecode_generic(d, ptr, msg, table, hasbits, 0); typedef enum { @@ -2651,21 +2708,18 @@ typedef enum { } upb_card; UPB_NOINLINE -static const char *fastdecode_isdonefallback(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits, int overrun) { +static const char *fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; ptr = decode_isdonefallback_inl(d, ptr, overrun); if (ptr == NULL) { return fastdecode_err(d); } - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits) { +static const char *fastdecode_dispatch(UPB_PARSE_PARAMS) { if (UPB_UNLIKELY(ptr >= d->limit_ptr)) { int overrun = ptr - d->end; if (UPB_LIKELY(overrun == d->limit)) { @@ -2673,21 +2727,22 @@ static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, *(uint32_t*)msg |= hasbits; // Sync hasbits. return ptr; } else { - return fastdecode_isdonefallback(d, ptr, msg, table, hasbits, overrun); + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } } // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static bool fastdecode_checktag(uint64_t data, int tagbytes) { +static bool fastdecode_checktag(uint16_t data, int tagbytes) { if (tagbytes == 1) { return (data & 0xff) == 0; } else { - return (data & 0xffff) == 0; + return data == 0; } } @@ -2911,6 +2966,14 @@ static bool fastdecode_flippacked(uint64_t *data, int tagbytes) { return fastdecode_checktag(*data, tagbytes); } +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + /* varint fields **************************************************************/ UPB_FORCEINLINE @@ -2953,57 +3016,50 @@ done: return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_unpackedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - bool zigzag, - _upb_field_parser *packed) { - uint64_t val; - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("varint field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("need array resize\n"); - } - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return fastdecode_err(d); - val = fastdecode_munge(val, valbytes, zigzag); - memcpy(dst, &val, valbytes); - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) \ + return fastdecode_err(d); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); typedef struct { uint8_t valbytes; @@ -3032,50 +3088,37 @@ static const char *fastdecode_topackedvarint(upb_decstate *d, const char *ptr, return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_packedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, bool zigzag, - _upb_field_parser *unpacked) { - fastdecode_varintdata ctx = {valbytes, zigzag}; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, - valbytes, CARD_r); - if (UPB_UNLIKELY(!ctx.dst)) { - RETURN_GENERIC("need array resize\n"); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); - - if (UPB_UNLIKELY(ptr == NULL)) { - return fastdecode_err(d); +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + return fastdecode_err(d); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, bool zigzag, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, zigzag, - unpacked); - } else { - return fastdecode_unpackedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, card, - zigzag, packed); - } -} - #define z_ZZ true #define b_ZZ false #define v_ZZ false @@ -3086,10 +3129,10 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, #define F(card, type, valbytes, tagbytes) \ UPB_NOINLINE \ const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - type##_ZZ, \ - &upb_pr##type##valbytes##_##tagbytes##bt, \ - &upb_pp##type##valbytes##_##tagbytes##bt); \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3117,126 +3160,110 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT /* fixed fields ***************************************************************/ -UPB_FORCEINLINE -static const char *fastdecode_unpackedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *packed) { - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("fixed field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("couldn't allocate array in arena\n"); - } - } - - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - memcpy(dst, ptr, valbytes); - ptr += valbytes; - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_packedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, - _upb_field_parser *unpacked) { - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ptr += tagbytes; - int size = (uint8_t)ptr[0]; - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); - } - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr)) || - (size % valbytes) != 0) { - return fastdecode_err(d); +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \ + (size % valbytes) != 0)) { \ + return fastdecode_err(d); \ + } \ + \ + upb_array **arr_p = fastdecode_fieldmem(msg, data); \ + upb_array *arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + return fastdecode_err(d); \ + } \ + } else { \ + _upb_array_resize(arr, elems, &d->arena); \ + } \ + \ + char *dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->len = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ } - upb_array **arr_p = fastdecode_fieldmem(msg, data); - upb_array *arr = *arr_p; - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - int elems = size / valbytes; - - if (UPB_LIKELY(!arr)) { - *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); - if (!arr) { - return fastdecode_err(d); - } - } else { - _upb_array_resize(arr, elems, &d->arena); - } - - char *dst = _upb_array_ptr(arr); - memcpy(dst, ptr, size); - arr->len = elems; - - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, unpacked); - } else { - return fastdecode_unpackedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, card, - packed); - } -} - /* Generate all combinations: * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - &upb_ppf##valbytes##_##tagbytes##bt, \ - &upb_prf##valbytes##_##tagbytes##bt); \ +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3255,6 +3282,8 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED /* string fields **************************************************************/ @@ -3266,56 +3295,54 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d, UPB_NOINLINE static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, upb_strview *dst) { + uint64_t hasbits, uint64_t data) { + upb_strview *dst = (upb_strview*)data; if (!decode_verifyutf8_inl(dst->data, dst->size)) { return fastdecode_err(d); } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_longstring(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst, - bool validate_utf8) { - int size = (uint8_t)ptr[0]; // Could plumb through hasbits. - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \ + dst->size = 0; \ + return fastdecode_err(d); \ + } \ + \ + if (d->alias) { \ + dst->data = ptr; \ + dst->size = size; \ + } else { \ + char *data = upb_arena_malloc(&d->arena, size); \ + if (!data) { \ + return fastdecode_err(d); \ + } \ + memcpy(data, ptr, size); \ + dst->data = data; \ + dst->size = size; \ + } \ + \ + ptr += size; \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { - dst->size = 0; - return fastdecode_err(d); - } - - if (d->alias) { - dst->data = ptr; - dst->size = size; - } else { - char *data = upb_arena_malloc(&d->arena, size); - if (!data) { - return fastdecode_err(d); - } - memcpy(data, ptr, size); - dst->data = data; - dst->size = size; - } - - if (validate_utf8) { - return fastdecode_verifyutf8(d, ptr + size, msg, table, hasbits, dst); - } else { - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); - } -} - UPB_NOINLINE static const char *fastdecode_longstring_utf8(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, true); + const char *ptr, upb_msg *msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); } UPB_NOINLINE @@ -3323,8 +3350,9 @@ static const char *fastdecode_longstring_noutf8(struct upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, false); + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); } UPB_FORCEINLINE @@ -3337,156 +3365,165 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, UPB_POISON_MEMORY_REGION(data + size, copy - size); } -UPB_FORCEINLINE -static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - size_t arena_has; - size_t common_has; - char *buf; - - UPB_ASSERT(!d->alias); - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (uint8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->size = size; - - buf = d->arena.head.ptr; - arena_has = _upb_arenahas(&d->arena); - common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); - - if (UPB_LIKELY(size <= 15 - tagbytes)) { - if (arena_has < 16) goto longstr; - d->arena.head.ptr += 16; - memcpy(buf, ptr - tagbytes - 1, 16); - dst->data = buf + tagbytes + 1; - } else if (UPB_LIKELY(size <= 32)) { - if (UPB_UNLIKELY(common_has < 32)) goto longstr; - fastdecode_docopy(d, ptr, size, 32, buf, dst); - } else if (UPB_LIKELY(size <= 64)) { - if (UPB_UNLIKELY(common_has < 64)) goto longstr; - fastdecode_docopy(d, ptr, size, 64, buf, dst); - } else if (UPB_LIKELY(size < 128)) { - if (UPB_UNLIKELY(common_has < 128)) goto longstr; - fastdecode_docopy(d, ptr, size, 128, buf, dst); - } else { - goto longstr; - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); - -longstr: - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } -} - -UPB_FORCEINLINE -static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, _upb_field_parser *copyfunc, - bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("string field tag mismatch\n"); - } - - if (UPB_UNLIKELY(!d->alias)) { - return copyfunc(UPB_PARSE_ARGS); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (int8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->data = ptr; - dst->size = size; - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - if (UPB_UNLIKELY(!d->alias)) { - // Buffer flipped and we can't alias any more. Bounce to copyfunc(), - // but via dispatch since we need to reload table data also. - fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - } - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char *buf; \ + \ + UPB_ASSERT(!d->alias); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_arenahas(&d->arena); \ + common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) \ + goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY(!d->alias)) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->data = ptr; \ + dst->size = size; \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + if (UPB_UNLIKELY(!d->alias)) { \ + /* Buffer flipped and we can't alias any more. Bounce to */ \ + /* copyfunc(), but via dispatch since we need to reload table */ \ + /* data also. */ \ + fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + } \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ @@ -3494,16 +3531,16 @@ again: #define s_VALIDATE true #define b_VALIDATE false -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - type##_VALIDATE); \ - } \ - const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - &upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ } #define UTF8(card, tagbytes) \ @@ -3522,6 +3559,9 @@ TAGBYTES(r) #undef b_VALIDATE #undef F #undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING /* message fields *************************************************************/ @@ -3554,82 +3594,82 @@ UPB_FORCEINLINE static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr, void *ctx) { fastdecode_submsgdata *submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0); + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); UPB_ASSUME(ptr != NULL); return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, - int msg_ceil_bytes, upb_card card) { - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("submessage field tag mismatch\n"); - } - - if (--d->depth == 0) return fastdecode_err(d); - - upb_msg **dst; - uint32_t submsg_idx = (data >> 16) & 0xff; - const upb_msglayout *tablep = decode_totablep(table); - const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; - fastdecode_arr farr; - - if (subtablep->table_mask == (uint8_t)-1) { - RETURN_GENERIC("submessage doesn't have fast tables."); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_msg *), card); - - if (card == CARD_s) { - *(uint32_t*)msg |= hasbits; - hasbits = 0; - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg*)); - } - - submsg.msg = *dst; - - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); - - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { - return fastdecode_err(d); - } - - if (card == CARD_r) { - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - d->depth++; - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - d->depth++; - return ptr; - } - } - - d->depth++; - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - return fastdecode_submsg(UPB_PARSE_ARGS, tagbytes, ceil_arg, CARD_##card); \ +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) return fastdecode_err(d); \ + \ + upb_msg **dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_msglayout *tablep = decode_totablep(table); \ + const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_msg *), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t *)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg *)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + return fastdecode_err(d); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } #define SIZES(card, tagbytes) \ @@ -3650,9 +3690,11 @@ TAGBYTES(r) #undef TAGBYTES #undef SIZES #undef F +#undef FASTDECODE_SUBMSG #endif /* UPB_FASTTABLE */ -/* This file was generated by upbc (the upb compiler) from the input + +/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4134,7 +4176,8 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { }; -/* This file was generated by upbc (the upb compiler) from the input + +/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upbdefs.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4519,6 +4562,7 @@ upb_def_init google_protobuf_descriptor_proto_upbdefinit = { UPB_STRVIEW_INIT(descriptor, 7601) }; +/** upb/def.c ************************************************************/ #include #include @@ -4556,7 +4600,6 @@ struct upb_fielddef { uint32_t number_; uint16_t index_; uint16_t layout_index; - uint32_t selector_base; /* Used to index into a upb::Handlers table. */ bool is_extension_; bool lazy_; bool packed_; @@ -4569,8 +4612,6 @@ struct upb_msgdef { const upb_msglayout *layout; const upb_filedef *file; const char *full_name; - uint32_t selector_count; - uint32_t submsg_field_count; /* Tables for looking up fields by number and name. */ upb_inttable itof; @@ -4700,30 +4741,6 @@ int cmp_fields(const void *p1, const void *p2) { return field_rank(f1) - field_rank(f2); } -/* A few implementation details of handlers. We put these here to avoid - * a def -> handlers dependency. */ - -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ - -static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; -} - -static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; -} - static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } @@ -4815,8 +4832,7 @@ bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { upb_value v; - return upb_inttable_lookup32(&def->iton, num, &v) ? - upb_value_getcstr(v) : NULL; + return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getcstr(v) : NULL; } const char *upb_enum_iter_name(upb_enum_iter *iter) { @@ -4905,10 +4921,6 @@ const char *upb_fielddef_jsonname(const upb_fielddef *f) { return f->json_name; } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; -} - const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { return f->file; } @@ -5071,18 +5083,10 @@ upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { return m->file->syntax; } -size_t upb_msgdef_selectorcount(const upb_msgdef *m) { - return m->selector_count; -} - -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { - return m->submsg_field_count; -} - const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { upb_value val; - return upb_inttable_lookup32(&m->itof, i, &val) ? - upb_value_getconstptr(val) : NULL; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, @@ -5290,8 +5294,8 @@ const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { upb_value val; - return upb_inttable_lookup32(&o->itof, num, &val) ? - upb_value_getptr(val) : NULL; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; } void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { @@ -5371,7 +5375,6 @@ void upb_symtab_free(upb_symtab *s) { upb_symtab *upb_symtab_new(void) { upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; if (!s) { return NULL; @@ -5379,10 +5382,9 @@ upb_symtab *upb_symtab_new(void) { s->arena = upb_arena_new(); s->bytes_loaded = 0; - alloc = upb_arena_alloc(s->arena); - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + if (!upb_strtable_init(&s->syms, 32, s->arena) || + !upb_strtable_init(&s->files, 4, s->arena)) { upb_arena_free(s->arena); upb_gfree(s); s = NULL; @@ -5438,8 +5440,7 @@ int upb_symtab_filecount(const upb_symtab *s) { typedef struct { upb_symtab *symtab; upb_filedef *file; /* File we are building. */ - upb_arena *file_arena; /* Allocate defs here. */ - upb_alloc *alloc; /* Alloc of file_arena, for tables. */ + upb_arena *arena; /* Allocate defs here. */ const upb_msglayout **layouts; /* NULL if we should build layouts. */ upb_status *status; /* Record errors here. */ jmp_buf err; /* longjmp() on error. */ @@ -5461,7 +5462,7 @@ static void symtab_oomerr(symtab_addctx *ctx) { } void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { - void *ret = upb_arena_malloc(ctx->file_arena, bytes); + void *ret = upb_arena_malloc(ctx->arena, bytes); if (!ret) symtab_oomerr(ctx); return ret; } @@ -5568,13 +5569,21 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msg_field_iter it; upb_msg_oneof_iter oit; size_t hasbit; - size_t submsg_count = m->submsg_field_count; + size_t field_count = upb_msgdef_numfields(m); + size_t submsg_count = 0; const upb_msglayout **submsgs; upb_msglayout_field *fields; memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); - fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + /* Count sub-messages. */ + for (size_t i = 0; i < field_count; i++) { + if (upb_fielddef_issubmsg(&m->fields[i])) { + submsg_count++; + } + } + + fields = symtab_alloc(ctx, field_count * sizeof(*fields)); submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); @@ -5725,51 +5734,8 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { assign_layout_indices(m, fields); } -static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); -} - static char *strviewdup(symtab_addctx *ctx, upb_strview view) { - return upb_strdup2(view.data, view.size, ctx->alloc); + return upb_strdup2(view.data, view.size, ctx->arena); } static bool streql2(const char *a, size_t n, const char *b) { @@ -5880,9 +5846,9 @@ static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { symtab_errf(ctx, "duplicate symbol '%s'", name); } - upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); size_t len = strlen(name); - CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); + CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v, + ctx->symtab->arena)); } /* Given a symbol and the base symbol inside which it is defined, find the @@ -5915,7 +5881,8 @@ static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, } notfound: - symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); + symtab_errf(ctx, "couldn't resolve name '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(sym)); } static void create_oneofdef( @@ -5933,10 +5900,10 @@ static void create_oneofdef( v = pack_def(o, UPB_DEFTYPE_ONEOF); symtab_add(ctx, o->full_name, v); - CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena)); - CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); + CHK_OOM(upb_inttable_init(&o->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena)); } static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { @@ -5992,8 +5959,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_INT64: { - /* XXX: Need to write our own strtoll, since it's not available in c89. */ - int64_t val = strtol(str, &end, 0); + long long val = strtoll(str, &end, 0); if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { goto invalid; } @@ -6009,8 +5975,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_UINT64: { - /* XXX: Need to write our own strtoull, since it's not available in c89. */ - uint64_t val = strtoul(str, &end, 0); + unsigned long long val = strtoull(str, &end, 0); if (val > UINT64_MAX || errno == ERANGE || *end) { goto invalid; } @@ -6026,8 +5991,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_FLOAT: { - /* XXX: Need to write our own strtof, since it's not available in c89. */ - float val = strtod(str, &end); + float val = strtof(str, &end); if (errno == ERANGE || *end) { goto invalid; } @@ -6093,7 +6057,6 @@ static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { static void create_fielddef( symtab_addctx *ctx, const char *prefix, upb_msgdef *m, const google_protobuf_FieldDescriptorProto *field_proto) { - upb_alloc *alloc = ctx->alloc; upb_fielddef *f; const google_protobuf_FieldOptions *options; upb_strview name; @@ -6129,7 +6092,8 @@ static void create_fielddef( upb_value v, field_v, json_v; size_t json_size; - f = (upb_fielddef*)&m->fields[m->field_count++]; + f = (upb_fielddef*)&m->fields[m->field_count]; + f->index_ = m->field_count++; f->msgdef = m; f->is_extension_ = false; @@ -6150,12 +6114,12 @@ static void create_fielddef( v = upb_value_constptr(f); json_size = strlen(json_name); - CHK_OOM( - upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); - CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v, + ctx->arena)); + CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena)); if (strcmp(shortname, json_name) != 0) { - upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + upb_strtable_insert(&m->ntof, json_name, json_size, json_v, ctx->arena); } if (ctx->layouts) { @@ -6218,15 +6182,16 @@ static void create_fielddef( symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); } - oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; + oneof = (upb_oneofdef *)&m->oneofs[oneof_index]; f->oneof = oneof; oneof->field_count++; if (f->proto3_optional_) { oneof->synthetic = true; } - CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena)); + CHK_OOM( + upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena)); } else { f->oneof = NULL; if (f->proto3_optional_) { @@ -6269,8 +6234,8 @@ static void create_enumdef( symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); - CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); + CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena)); + CHK_OOM(upb_inttable_init(&e->iton, ctx->arena)); e->file = ctx->file; e->defaultval = 0; @@ -6297,16 +6262,15 @@ static void create_enumdef( } CHK_OOM(name2) - CHK_OOM( - upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&e->ntoi, name2, strlen(name2), v, ctx->arena)); if (!upb_inttable_lookup(&e->iton, num, NULL)) { upb_value v = upb_value_cstr(name2); - CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); + CHK_OOM(upb_inttable_insert(&e->iton, num, v, ctx->arena)); } } - upb_inttable_compact2(&e->iton, ctx->alloc); + upb_inttable_compact(&e->iton, ctx->arena); } static void create_msgdef(symtab_addctx *ctx, const char *prefix, @@ -6330,9 +6294,8 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); - CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field, - ctx->alloc)); + CHK_OOM(upb_inttable_init(&m->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena)); m->file = ctx->file; m->map_entry = false; @@ -6364,10 +6327,9 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, create_fielddef(ctx, m->full_name, m, fields[i]); } - assign_msg_indices(ctx, m); finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); - upb_inttable_compact2(&m->itof, ctx->alloc); + upb_inttable_compact(&m->itof, ctx->arena); /* This message is built. Now build nested messages and enums. */ @@ -6596,19 +6558,18 @@ static void build_filedef( } static void remove_filedef(upb_symtab *s, upb_filedef *file) { - upb_alloc *alloc = upb_arena_alloc(s->arena); int i; for (i = 0; i < file->msg_count; i++) { const char *name = file->msgs[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->enum_count; i++) { const char *name = file->enums[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->ext_count; i++) { const char *name = file->exts[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } } @@ -6626,8 +6587,7 @@ static const upb_filedef *_upb_symtab_addfile( ctx.file = file; ctx.symtab = s; - ctx.file_arena = file_arena; - ctx.alloc = upb_arena_alloc(file_arena); + ctx.arena = file_arena; ctx.layouts = layouts; ctx.status = status; @@ -6642,8 +6602,8 @@ static const upb_filedef *_upb_symtab_addfile( file = NULL; } else { build_filedef(&ctx, file, file_proto); - upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), ctx.alloc); + upb_strtable_insert(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.arena); UPB_ASSERT(upb_ok(status)); upb_arena_fuse(s->arena, file_arena); } @@ -6717,6 +6677,7 @@ upb_arena *_upb_symtab_arena(const upb_symtab *s) { #undef CHK_OOM +/** upb/reflection.c ************************************************************/ #include @@ -6827,40 +6788,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { return _upb_msg_getraw(msg, f); } else { - /* TODO(haberman): change upb_fielddef to not require this switch(). */ - upb_msgval val = {0}; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: - val.int32_val = upb_fielddef_defaultint32(f); - break; - case UPB_TYPE_INT64: - val.int64_val = upb_fielddef_defaultint64(f); - break; - case UPB_TYPE_UINT32: - val.uint32_val = upb_fielddef_defaultuint32(f); - break; - case UPB_TYPE_UINT64: - val.uint64_val = upb_fielddef_defaultuint64(f); - break; - case UPB_TYPE_FLOAT: - val.float_val = upb_fielddef_defaultfloat(f); - break; - case UPB_TYPE_DOUBLE: - val.double_val = upb_fielddef_defaultdouble(f); - break; - case UPB_TYPE_BOOL: - val.bool_val = upb_fielddef_defaultbool(f); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); - break; - case UPB_TYPE_MESSAGE: - val.msg_val = NULL; - break; - } - return val; + return upb_fielddef_default(f); } } @@ -7120,6 +7048,7 @@ upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ +/** upb/json_decode.c ************************************************************/ #include #include @@ -8030,17 +7959,17 @@ static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { return; } - if (upb_fielddef_realcontainingoneof(f) && - upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { - jsondec_err(d, "More than one field for this oneof."); - } - if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { /* JSON "null" indicates a default value, so no need to set anything. */ jsondec_null(d); return; } + if (upb_fielddef_realcontainingoneof(f) && + upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + preserved = d->debug_field; d->debug_field = f; @@ -8544,6 +8473,9 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, const upb_msgdef *m, const upb_symtab *any_pool, int options, upb_arena *arena, upb_status *status) { jsondec d; + + if (size == 0) return true; + d.ptr = buf; d.end = buf + size; d.arena = arena; @@ -8562,6 +8494,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, return true; } +/** upb/json_encode.c ************************************************************/ #include #include @@ -8591,7 +8524,7 @@ static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m); + const upb_msgdef *m, bool first); static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { @@ -8622,8 +8555,10 @@ static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { memcpy(e->ptr, data, len); e->ptr += len; } else { - if (have) memcpy(e->ptr, data, have); - e->ptr += have; + if (have) { + memcpy(e->ptr, data, have); + e->ptr += have; + } e->overflow += (len - have); } } @@ -8645,7 +8580,7 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { if (UPB_LIKELY(have > n)) { e->ptr += n; } else { - e->ptr += have; + e->ptr = UPB_PTRADD(e->ptr, have); e->overflow += (n - have); } } @@ -8749,7 +8684,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const unsigned char *ptr = (unsigned char*)str.data; - const unsigned char *end = ptr + str.size; + const unsigned char *end = UPB_PTRADD(ptr, str.size); char buf[4]; jsonenc_putstr(e, "\""); @@ -8785,7 +8720,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static void jsonenc_stringbody(jsonenc *e, upb_strview str) { const char *ptr = str.data; - const char *end = ptr + str.size; + const char *end = UPB_PTRADD(ptr, str.size); while (ptr < end) { switch (*ptr) { @@ -8901,14 +8836,13 @@ static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{\"@type\":"); jsonenc_string(e, type_url); - jsonenc_putstr(e, ","); if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */ - jsonenc_msgfields(e, any, any_m); + jsonenc_msgfields(e, any, any_m, false); } else { /* Well-known type: {"@type": "...","value": } */ - jsonenc_putstr(e, "\"value\":"); + jsonenc_putstr(e, ",\"value\":"); jsonenc_msgfield(e, any, any_m); } @@ -9211,10 +9145,9 @@ static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, } static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m) { + const upb_msgdef *m, bool first) { upb_msgval val; const upb_fielddef *f; - bool first = true; if (e->options & UPB_JSONENC_EMITDEFAULTS) { /* Iterate over all fields. */ @@ -9237,7 +9170,7 @@ static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{"); - jsonenc_msgfields(e, msg, m); + jsonenc_msgfields(e, msg, m, true); jsonenc_putstr(e, "}"); } @@ -9259,7 +9192,7 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, e.buf = buf; e.ptr = buf; - e.end = buf + size; + e.end = UPB_PTRADD(buf, size); e.overflow = 0; e.options = options; e.ext_pool = ext_pool; @@ -9272,27 +9205,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, if (e.arena) upb_arena_free(e.arena); return jsonenc_nullz(&e, size); } + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index bd72cd9c08..77a87c7691 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -1,26 +1,53 @@ /* Amalgamated source file */ -#include /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -136,9 +163,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#if defined(__has_attribute) +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -149,7 +207,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -193,55 +251,36 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif + +/** upb/decode.h ************************************************************/ /* -** upb_decode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_decode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_DECODE_H_ #define UPB_DECODE_H_ + +/** upb/msg.h ************************************************************/ /* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ + * Public APIs for message operations that do not require descriptors. + * These functions can be used even in build that does not want to depend on + * reflection or descriptors. + * + * Descriptor-based reflection functionality lives in reflection.h. + */ #ifndef UPB_MSG_H_ #define UPB_MSG_H_ -#include -#include -#include - -/* -** upb_table -** -** This header is INTERNAL-ONLY! Its interfaces are not public or stable! -** This file defines very fast int->upb_value (inttable) and string->upb_value -** (strtable) hash tables. -** -** The table uses chained scatter with Brent's variation (inspired by the Lua -** implementation of hash tables). The hash function for strings is Austin -** Appleby's "MurmurHash." -** -** The inttable uses uintptr_t as its key, which guarantees it can be used to -** store pointers or integers of at least 32 bits (upb isn't really useful on -** systems where sizeof(void*) < 4). -** -** The table must be homogeneous (all values of the same type). In debug -** mode, we check this on insert and lookup. -*/ +#include -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ -#include -#include +/** upb/upb.h ************************************************************/ /* -** This file contains shared definitions that are widely used across upb. -*/ + * This file contains shared definitions that are widely used across upb. + */ #ifndef UPB_H_ #define UPB_H_ @@ -399,7 +438,7 @@ typedef struct { upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc); void upb_arena_free(upb_arena *a); bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func); -void upb_arena_fuse(upb_arena *a, upb_arena *b); +bool upb_arena_fuse(upb_arena *a, upb_arena *b); void *_upb_arena_slowmalloc(upb_arena *a, size_t size); UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } @@ -578,55 +617,134 @@ UPB_INLINE int _upb_lg2ceilsize(int x) { #endif /* UPB_H_ */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef void upb_msg; + +/* For users these are opaque. They can be obtained from upb_msgdef_layout() + * but users cannot access any of the members. */ +struct upb_msglayout; +typedef struct upb_msglayout upb_msglayout; + +/* Adds unknown data (serialized protobuf data) to the given message. The data + * is copied into the message instance. */ +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena); + +/* Returns a reference to the message's unknown data. */ +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MSG_INT_H_ */ + +/* Must be last. */ #ifdef __cplusplus extern "C" { #endif +enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ + UPB_DECODE_ALIAS = 1, +}; -/* upb_value ******************************************************************/ +#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) -/* A tagged union (stored untagged inside the table) so that we can check that - * clients calling table accessors are correctly typed without having to have - * an explosion of accessors. */ -typedef enum { - UPB_CTYPE_INT32 = 1, - UPB_CTYPE_INT64 = 2, - UPB_CTYPE_UINT32 = 3, - UPB_CTYPE_UINT64 = 4, - UPB_CTYPE_BOOL = 5, - UPB_CTYPE_CSTR = 6, - UPB_CTYPE_PTR = 7, - UPB_CTYPE_CONSTPTR = 8, - UPB_CTYPE_FPTR = 9, - UPB_CTYPE_FLOAT = 10, - UPB_CTYPE_DOUBLE = 11 -} upb_ctype_t; +bool _upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena, int options); + +UPB_INLINE +bool upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena) { + return _upb_decode(buf, size, msg, l, arena, 0); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_DECODE_H_ */ + +/** upb/decode_internal.h ************************************************************/ +/* + * Internal implementation details of the decoder that are shared between + * decode.c and decode_fast.c. + */ + +#ifndef UPB_DECODE_INT_H_ +#define UPB_DECODE_INT_H_ + +#include + + +/** upb/msg_internal.h ************************************************************//* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MSG_INT_H_ +#define UPB_MSG_INT_H_ + +#include +#include +#include + + +/** upb/table_internal.h ************************************************************/ +/* + * upb_table + * + * This header is INTERNAL-ONLY! Its interfaces are not public or stable! + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. + * + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). + * + * The table must be homogeneous (all values of the same type). In debug + * mode, we check this on insert and lookup. + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_value ******************************************************************/ typedef struct { uint64_t val; } upb_value; -/* Like strdup(), which isn't always available since it's not ANSI C. */ -char *upb_strdup(const char *s, upb_alloc *a); /* Variant that works with a length-delimited rather than NULL-delimited string, * as supported by strtable. */ -char *upb_strdup2(const char *s, size_t len, upb_alloc *a); - -UPB_INLINE char *upb_gstrdup(const char *s) { - return upb_strdup(s, &upb_alloc_global); -} +char *upb_strdup2(const char *s, size_t len, upb_arena *a); UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) { v->val = val; } -UPB_INLINE upb_value _upb_value_val(uint64_t val) { - upb_value ret; - _upb_value_setval(&ret, val); - return ret; -} - /* For each value ctype, define the following set of functions: * * // Get/set an int32 from a upb_value. @@ -734,14 +852,7 @@ typedef struct { uint32_t mask; /* Mask to turn hash value -> bucket. */ uint32_t max_count; /* Max count before we hit our load limit. */ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */ - - /* Hash table entries. - * Making this const isn't entirely accurate; what we really want is for it to - * have the same const-ness as the table it's inside. But there's no way to - * declare that in C. So we have to make it const so that we can statically - * initialize const hash tables. Then we cast away const when we have to. - */ - const upb_tabent *entries; + upb_tabent *entries; } upb_table; typedef struct { @@ -755,8 +866,6 @@ typedef struct { size_t array_count; /* Array part number of elements. */ } upb_inttable; -#define UPB_ARRAY_EMPTYENT -1 - UPB_INLINE size_t upb_table_size(const upb_table *t) { if (t->size_lg2 == 0) return 0; @@ -769,48 +878,10 @@ UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key == 0; } -/* Used by some of the unit tests for generic hashing functionality. */ -uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed); - -UPB_INLINE uintptr_t upb_intkey(uintptr_t key) { - return key; -} - -UPB_INLINE uint32_t upb_inthash(uintptr_t key) { - return (uint32_t)key; -} - -static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { - return t->entries + (hash & t->mask); -} - -UPB_INLINE bool upb_arrhas(upb_tabval key) { - return key.val != (uint64_t)-1; -} - /* Initialize and uninitialize a table, respectively. If memory allocation * failed, false is returned that the table is uninitialized. */ -bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a); -bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a); -void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a); -void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a); - -UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) { - return upb_inttable_init2(table, ctype, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) { - return upb_strtable_init2(table, ctype, 4, &upb_alloc_global); -} - -UPB_INLINE void upb_inttable_uninit(upb_inttable *table) { - upb_inttable_uninit2(table, &upb_alloc_global); -} - -UPB_INLINE void upb_strtable_uninit(upb_strtable *table) { - upb_strtable_uninit2(table, &upb_alloc_global); -} +bool upb_inttable_init(upb_inttable *table, upb_arena *a); +bool upb_strtable_init(upb_strtable *table, size_t expected_size, upb_arena *a); /* Returns the number of values in the table. */ size_t upb_inttable_count(const upb_inttable *t); @@ -818,12 +889,6 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -void upb_inttable_packedsize(const upb_inttable *t, size_t *size); -void upb_strtable_packedsize(const upb_strtable *t, size_t *size); -upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs, - size_t size); -upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs, - size_t size); void upb_strtable_clear(upb_strtable *t); /* Inserts the given key into the hashtable with the given value. The key must @@ -833,26 +898,10 @@ void upb_strtable_clear(upb_strtable *t); * * If a table resize was required but memory allocation failed, false is * returned and the table is unchanged. */ -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a); -bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len, - upb_value val, upb_alloc *a); - -UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key, - upb_value val) { - return upb_inttable_insert2(t, key, val, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key, - size_t len, upb_value val) { - return upb_strtable_insert3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key, - upb_value val) { - return upb_strtable_insert2(t, key, strlen(key), val); -} +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a); +bool upb_strtable_insert(upb_strtable *t, const char *key, size_t len, + upb_value val, upb_arena *a); /* Looks up key in this table, returning "true" if the key was found. * If v is non-NULL, copies the value for this key into *v. */ @@ -869,74 +918,21 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, /* Removes an item from the table. Returns true if the remove was successful, * and stores the removed item in *val if non-NULL. */ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc); - -UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key, - size_t len, upb_value *val) { - return upb_strtable_remove3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, - upb_value *v) { - return upb_strtable_remove2(t, key, strlen(key), v); -} +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val); /* Updates an existing entry in an inttable. If the entry does not exist, * returns false and does nothing. Unlike insert/remove, this does not * invalidate iterators. */ bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val); -/* Convenience routines for inttables with pointer keys. */ -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a); -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); -bool upb_inttable_lookupptr( - const upb_inttable *t, const void *key, upb_value *val); - -UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key, - upb_value val) { - return upb_inttable_insertptr2(t, key, val, &upb_alloc_global); -} - /* Optimizes the table for the current set of entries, for both memory use and * lookup time. Client should call this after all entries have been inserted; * inserting more entries is legal, but will likely require a table resize. */ -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a); - -UPB_INLINE void upb_inttable_compact(upb_inttable *t) { - upb_inttable_compact2(t, &upb_alloc_global); -} - -/* A special-case inlinable version of the lookup routine for 32-bit - * integers. */ -UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key, - upb_value *v) { - *v = upb_value_int32(0); /* Silence compiler warnings. */ - if (key < t->array_size) { - upb_tabval arrval = t->array[key]; - if (upb_arrhas(arrval)) { - _upb_value_setval(v, arrval.val); - return true; - } else { - return false; - } - } else { - const upb_tabent *e; - if (t->t.entries == NULL) return false; - for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) { - if ((uint32_t)e->key == key) { - _upb_value_setval(v, e->val.val); - return true; - } - if (e->next == NULL) return false; - } - } -} +void upb_inttable_compact(upb_inttable *t, upb_arena *a); /* Exposed for testing only. */ -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a); +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a); /* Iterators ******************************************************************/ @@ -1032,10 +1028,6 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, extern "C" { #endif -#define PTR_AT(msg, ofs, type) (type*)((const char*)msg + ofs) - -typedef void upb_msg; - /** upb_msglayout *************************************************************/ /* upb_msglayout represents the memory layout of a given upb_msgdef. The @@ -1070,7 +1062,7 @@ typedef struct { _upb_field_parser *field_parser; } _upb_fasttable_entry; -typedef struct upb_msglayout { +struct upb_msglayout { const struct upb_msglayout *const* submsgs; const upb_msglayout_field *fields; /* Must be aligned to sizeof(void*). Doesn't include internal members like @@ -1082,7 +1074,7 @@ typedef struct upb_msglayout { /* To constant-initialize the tables of variable length, we need a flexible * array member, and we need to compile in C99 mode. */ _upb_fasttable_entry fasttable[]; -} upb_msglayout; +}; /** upb_msg *******************************************************************/ @@ -1137,21 +1129,18 @@ void _upb_msg_discardunknown_shallow(upb_msg *msg); bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** Hasbit access *************************************************************/ UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) { - return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; + return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; } UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); + (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); } UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); + (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); } UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) { @@ -1177,11 +1166,11 @@ UPB_INLINE void _upb_clearhas_field(const upb_msg *msg, /** Oneof case access *********************************************************/ UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) { - return PTR_AT(msg, case_ofs, uint32_t); + return UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) { - return *PTR_AT(msg, case_ofs, uint32_t); + return *UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) { @@ -1200,7 +1189,7 @@ UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg, } UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) { - return *PTR_AT(msg, ofs, const upb_msg*) != NULL; + return *UPB_PTR_AT(msg, ofs, const upb_msg*) != NULL; } UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) { @@ -1277,7 +1266,7 @@ UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size, UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, size_t *size) { - const upb_array *arr = *PTR_AT(msg, ofs, const upb_array*); + const upb_array *arr = *UPB_PTR_AT(msg, ofs, const upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_constptr(arr); @@ -1289,7 +1278,7 @@ UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, size_t *size) { - upb_array *arr = *PTR_AT(msg, ofs, upb_array*); + upb_array *arr = *UPB_PTR_AT(msg, ofs, upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_ptr(arr); @@ -1302,7 +1291,7 @@ UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size, int elem_size_lg2, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); upb_array *arr = *arr_ptr; if (!arr || arr->size < size) { return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena); @@ -1315,7 +1304,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, int elem_size_lg2, const void *value, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); size_t elem_size = 1 << elem_size_lg2; upb_array *arr = *arr_ptr; void *ptr; @@ -1323,7 +1312,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena); } ptr = _upb_array_ptr(arr); - memcpy(PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); + memcpy(UPB_PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); arr->len++; return true; } @@ -1470,20 +1459,19 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) { } UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size, - void *val, size_t val_size, upb_arena *arena) { + void *val, size_t val_size, upb_arena *a) { upb_strview strkey = _upb_map_tokey(key, key_size); upb_value tabval = {0}; - if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false; - upb_alloc *a = upb_arena_alloc(arena); + if (!_upb_map_tovalue(val, val_size, &tabval, a)) return false; /* TODO(haberman): add overwrite operation to minimize number of lookups. */ - upb_strtable_remove3(&map->table, strkey.data, strkey.size, NULL, a); - return upb_strtable_insert3(&map->table, strkey.data, strkey.size, tabval, a); + upb_strtable_remove(&map->table, strkey.data, strkey.size, NULL); + return upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a); } UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) { upb_strview k = _upb_map_tokey(key, key_size); - return upb_strtable_remove3(&map->table, k.data, k.size, NULL, NULL); + return upb_strtable_remove(&map->table, k.data, k.size, NULL); } UPB_INLINE void _upb_map_clear(upb_map *map) { @@ -1515,7 +1503,7 @@ UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs, UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key, size_t key_size, void *val, size_t val_size, upb_arena *arena) { - upb_map **map = PTR_AT(msg, ofs, upb_map *); + upb_map **map = UPB_PTR_AT(msg, ofs, upb_map *); if (!*map) { *map = _upb_map_new(arena, key_size, val_size); } @@ -1548,8 +1536,7 @@ UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) { const upb_tabent *ent = (const upb_tabent*)msg; - upb_value v; - _upb_value_setval(&v, ent->val.val); + upb_value v = {ent->val.val}; _upb_map_fromvalue(v, val, size); } @@ -1612,55 +1599,14 @@ UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, return true; } -#undef PTR_AT - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_MSG_H_ */ - -/* Must be last. */ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* If set, strings will alias the input buffer instead of copying into the - * arena. */ - UPB_DECODE_ALIAS = 1, -}; - -#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) - -bool _upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena, int options); - -UPB_INLINE -bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena) { - return _upb_decode(buf, size, msg, l, arena, 0); -} - #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_DECODE_H_ */ -/* -** Internal implementation details of the decoder that are shared between -** decode.c and decode_fast.c. -*/ - -#ifndef UPB_DECODE_INT_H_ -#define UPB_DECODE_INT_H_ - -#include - +#endif /* UPB_MSG_INT_H_ */ +/** upb/upb_internal.h ************************************************************/ #ifndef UPB_INT_H_ #define UPB_INT_H_ @@ -1670,7 +1616,10 @@ typedef struct mem_block mem_block; struct upb_arena { _upb_arena_head head; - uint32_t *cleanups; + /* Stores cleanup metadata for this arena. + * - a pointer to the current cleanup counter. + * - a boolean indicating if there is an unowned initial block. */ + uintptr_t cleanup_metadata; /* Allocator to allocate arena blocks. We are responsible for freeing these * when we are destroyed. */ @@ -1792,10 +1741,11 @@ bool decode_isdone(upb_decstate *d, const char **ptr) { } } +#if UPB_FASTTABLE UPB_INLINE const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, uint32_t tag) { + uint64_t hasbits, uint64_t tag) { const upb_msglayout *table_p = decode_totablep(table); uint8_t mask = table; uint64_t data; @@ -1803,8 +1753,10 @@ const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, UPB_ASSUME((idx & 7) == 0); idx >>= 3; data = table_p->fasttable[idx].field_data ^ tag; - return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data); + UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table, + hasbits, data); } +#endif UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { uint16_t tag; @@ -1837,9 +1789,11 @@ UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr, #endif /* UPB_DECODE_INT_H_ */ + +/** upb/encode.h ************************************************************/ /* -** upb_encode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_encode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_ENCODE_H_ #define UPB_ENCODE_H_ @@ -1880,6 +1834,8 @@ UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, #endif #endif /* UPB_ENCODE_H_ */ + +/** upb/decode_fast.h ************************************************************/ // These are the specialized field parser functions for the fast parser. // Generated tables will refer to these by name. // @@ -2005,7 +1961,8 @@ TAGBYTES(r) #undef UPB_PARSE_PARAMS #endif /* UPB_DECODE_FAST_H_ */ -/* This file was generated by upbc (the upb compiler) from the input + +/** google/protobuf/descriptor.upb.h ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -3884,18 +3841,20 @@ UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_prot #endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_ */ + +/** upb/def.h ************************************************************/ /* -** Defs are upb's internal representation of the constructs that can appear -** in a .proto file: -** -** - upb_msgdef: describes a "message" construct. -** - upb_fielddef: describes a message field. -** - upb_filedef: describes a .proto file and its defs. -** - upb_enumdef: describes an enum. -** - upb_oneofdef: describes a oneof. -** -** TODO: definitions of services. -*/ + * Defs are upb's internal representation of the constructs that can appear + * in a .proto file: + * + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_filedef: describes a .proto file and its defs. + * - upb_enumdef: describes an enum. + * - upb_oneofdef: describes a oneof. + * + * TODO: definitions of services. + */ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ @@ -3991,9 +3950,6 @@ const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f); const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f); const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f); -/* Internal only. */ -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f); - /* upb_oneofdef ***************************************************************/ typedef upb_inttable_iter upb_oneof_iter; @@ -4078,10 +4034,6 @@ UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, return upb_msgdef_ntof(m, name, strlen(name)); } -/* Internal-only. */ -size_t upb_msgdef_selectorcount(const upb_msgdef *m); -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m); - /* Lookup of either field or oneof by name. Returns whether either was found. * If the return is true, then the found def will be set, and the non-found * one set to NULL. */ @@ -4196,7 +4148,8 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init); #endif /* __cplusplus */ #endif /* UPB_DEF_H_ */ -/* This file was generated by upbc (the upb compiler) from the input + +/** google/protobuf/descriptor.upbdefs.h ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4357,6 +4310,7 @@ UPB_INLINE const upb_msgdef *google_protobuf_GeneratedCodeInfo_Annotation_getmsg #endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPBDEFS_H_ */ +/** upb/reflection.h ************************************************************/ #ifndef UPB_REFLECTION_H_ #define UPB_REFLECTION_H_ @@ -4438,17 +4392,9 @@ bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, const upb_symtab *ext_pool, const upb_fielddef **f, upb_msgval *val, size_t *iter); -/* Adds unknown data (serialized protobuf data) to the given message. The data - * is copied into the message instance. */ -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena); - /* Clears all unknown field data from this message and all submessages. */ bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** upb_array *****************************************************************/ /* Creates a new array on the given arena that holds elements of this type. */ @@ -4530,6 +4476,7 @@ void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); #endif /* UPB_REFLECTION_H_ */ +/** upb/json_decode.h ************************************************************/ #ifndef UPB_JSONDECODE_H_ #define UPB_JSONDECODE_H_ @@ -4552,6 +4499,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, #endif /* UPB_JSONDECODE_H_ */ +/** upb/json_encode.h ************************************************************/ #ifndef UPB_JSONENCODE_H_ #define UPB_JSONENCODE_H_ @@ -4586,27 +4534,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, #endif #endif /* UPB_JSONENCODE_H_ */ + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/php/src/Google/Protobuf/Internal/Message.php b/php/src/Google/Protobuf/Internal/Message.php index e74943c1ab..19b48f0b50 100644 --- a/php/src/Google/Protobuf/Internal/Message.php +++ b/php/src/Google/Protobuf/Internal/Message.php @@ -240,10 +240,14 @@ class Message $field = $this->desc->getFieldByNumber($number); $oneof = $this->desc->getOneofDecl()[$field->getOneofIndex()]; $oneof_name = $oneof->getName(); - $oneof_field = $this->$oneof_name; - $oneof_field->setValue($value); - $oneof_field->setFieldName($field->getName()); - $oneof_field->setNumber($number); + if ($value === null) { + $this->$oneof_name = new OneofField($oneof); + } else { + $oneof_field = $this->$oneof_name; + $oneof_field->setValue($value); + $oneof_field->setFieldName($field->getName()); + $oneof_field->setNumber($number); + } } protected function whichOneof($oneof_name) diff --git a/php/src/Google/Protobuf/Internal/RepeatedField.php b/php/src/Google/Protobuf/Internal/RepeatedField.php index 350bbb592e..c0331ff38e 100644 --- a/php/src/Google/Protobuf/Internal/RepeatedField.php +++ b/php/src/Google/Protobuf/Internal/RepeatedField.php @@ -177,8 +177,7 @@ class RepeatedField implements \ArrayAccess, \IteratorAggregate, \Countable break; case GPBType::MESSAGE: if (is_null($value)) { - trigger_error("RepeatedField element cannot be null.", - E_USER_ERROR); + throw new \TypeError("RepeatedField element cannot be null."); } GPBUtil::checkMessage($value, $this->klass); break; diff --git a/php/tests/ArrayTest.php b/php/tests/ArrayTest.php index b687085299..9e8fcb8bea 100644 --- a/php/tests/ArrayTest.php +++ b/php/tests/ArrayTest.php @@ -602,6 +602,17 @@ class ArrayTest extends TestBase $this->assertLessThan($start, $end); } + ######################################################### + # Test incorrect types + ######################################################### + + public function testAppendNull() + { + $this->expectException(TypeError::class); + $arr = new RepeatedField(GPBType::MESSAGE, TestMessage::class); + $arr[] = null; + } + ######################################################### # Test equality ######################################################### diff --git a/php/tests/EncodeDecodeTest.php b/php/tests/EncodeDecodeTest.php index 273010e2ed..ac01ca17a3 100644 --- a/php/tests/EncodeDecodeTest.php +++ b/php/tests/EncodeDecodeTest.php @@ -940,6 +940,14 @@ class EncodeDecodeTest extends TestBase $this->expectFields($to); } + public function testJsonEncodeNullSubMessage() + { + $from = new TestMessage(); + $from->setOptionalMessage(null); + $data = $from->serializeToJsonString(); + $this->assertEquals("{}", $data); + } + public function testDecodeDuration() { $m = new Google\Protobuf\Duration(); diff --git a/php/tests/GeneratedClassTest.php b/php/tests/GeneratedClassTest.php index 5c0f0c70d0..9e176e8341 100644 --- a/php/tests/GeneratedClassTest.php +++ b/php/tests/GeneratedClassTest.php @@ -476,10 +476,12 @@ class GeneratedClassTest extends TestBase $sub_m->setA(1); $m->setOptionalMessage($sub_m); $this->assertSame(1, $m->getOptionalMessage()->getA()); + $this->assertTrue($m->hasOptionalMessage()); $null = null; $m->setOptionalMessage($null); $this->assertNull($m->getOptionalMessage()); + $this->assertFalse($m->hasOptionalMessage()); } public function testLegacyMessageField() @@ -1748,6 +1750,13 @@ class GeneratedClassTest extends TestBase $m->clear(); $this->assertFalse($m->hasOneofInt32()); $this->assertFalse($m->hasOneofString()); + + $sub_m = new Sub(); + $sub_m->setA(1); + $m->setOneofMessage($sub_m); + $this->assertTrue($m->hasOneofMessage()); + $m->setOneofMessage(null); + $this->assertFalse($m->hasOneofMessage()); } ######################################################### diff --git a/ruby/Rakefile b/ruby/Rakefile index 11397b0eb9..221e9b507f 100644 --- a/ruby/Rakefile +++ b/ruby/Rakefile @@ -18,6 +18,18 @@ well_known_protos = %w[ google/protobuf/wrappers.proto ] +test_protos = %w[ + tests/basic_test.proto + tests/basic_test_proto2.proto + tests/generated_code.proto + tests/generated_code_proto2.proto + tests/multi_level_nesting_test.proto + tests/test_import.proto + tests/test_import_proto2.proto + tests/test_ruby_package.proto + tests/test_ruby_package_proto2.proto +] + # These are omitted for now because we don't support proto2. proto2_protos = %w[ google/protobuf/descriptor.proto @@ -43,6 +55,14 @@ unless ENV['IN_DOCKER'] == 'true' sh "#{protoc_command} -I../src --ruby_out=lib #{input_file}" end end + + test_protos.each do |proto_file| + output_file = proto_file.sub(/\.proto$/, "_pb.rb") + genproto_output << output_file + file output_file => proto_file do |file_task| + sh "#{protoc_command} -I../src -I. --ruby_out=. #{proto_file}" + end + end end if RUBY_PLATFORM == "java" @@ -100,59 +120,6 @@ else end end - -# Proto for tests. -genproto_output << "tests/generated_code.rb" -genproto_output << "tests/generated_code_proto2.rb" -genproto_output << "tests/test_import.rb" -genproto_output << "tests/test_import_proto2.rb" -genproto_output << "tests/test_ruby_package.rb" -genproto_output << "tests/test_ruby_package_proto2.rb" -genproto_output << "tests/basic_test.rb" -genproto_output << "tests/basic_test_proto2.rb" -genproto_output << "tests/multi_level_nesting_test.rb" -genproto_output << "tests/wrappers.rb" - -file "tests/generated_code.rb" => "tests/generated_code.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/generated_code.proto" -end - -file "tests/generated_code_proto2.rb" => "tests/generated_code_proto2.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/generated_code_proto2.proto" -end - -file "tests/test_import.rb" => "tests/test_import.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_import.proto" -end - -file "tests/test_import_proto2.rb" => "tests/test_import_proto2.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_import_proto2.proto" -end - -file "tests/test_ruby_package.rb" => "tests/test_ruby_package.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_ruby_package.proto" -end - -file "tests/test_ruby_package_proto2.rb" => "tests/test_ruby_package_proto2.proto" do |file_task| - sh "#{protoc_command} --ruby_out=. tests/test_ruby_package_proto2.proto" -end - -file "tests/basic_test.rb" => "tests/basic_test.proto" do |file_task| - sh "#{protoc_command} --experimental_allow_proto3_optional -I../src -I. --ruby_out=. tests/basic_test.proto" -end - -file "tests/basic_test_proto2.rb" => "tests/basic_test_proto2.proto" do |file_task| - sh "#{protoc_command} -I../src -I. --ruby_out=. tests/basic_test_proto2.proto" -end - -file "tests/multi_level_nesting_test.rb" => "tests/multi_level_nesting_test.proto" do |file_task| - sh "#{protoc_command} -I../src -I. --ruby_out=. tests/multi_level_nesting_test.proto" -end - -file "tests/wrappers.rb" => "../src/google/protobuf/wrappers.proto" do |file_task| - sh "#{protoc_command} -I../src -I. --ruby_out=tests ../src/google/protobuf/wrappers.proto" -end - task :genproto => genproto_output task :clean do @@ -162,7 +129,7 @@ end Gem::PackageTask.new(spec) do |pkg| end -Rake::TestTask.new(:test => :build) do |t| +Rake::TestTask.new(:test => [:build, :genproto]) do |t| t.test_files = FileList["tests/*.rb"].exclude("tests/gc_test.rb", "tests/common_tests.rb") end @@ -172,7 +139,7 @@ Rake::TestTask.new(:gc_test => :build) do |t| t.test_files = FileList["tests/gc_test.rb"] end -task :build => [:clean, :compile, :genproto] +task :build => [:clean, :genproto, :compile] task :default => [:build] # vim:sw=2:et diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index ffdae6a401..c1b9b86330 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -794,6 +794,14 @@ static VALUE Message_CreateHash(const upb_msg *msg, const upb_msgdef *m) { VALUE msg_value; VALUE msg_key; + if (!is_proto2 && upb_fielddef_issubmsg(field) && + !upb_fielddef_isseq(field) && !upb_msg_has(msg, field)) { + // TODO: Legacy behavior, remove when we fix the is_proto2 differences. + msg_key = ID2SYM(rb_intern(upb_fielddef_name(field))); + rb_hash_aset(hash, msg_key, Qnil); + continue; + } + // Do not include fields that are not present (oneof or optional fields). if (is_proto2 && upb_fielddef_haspresence(field) && !upb_msg_has(msg, field)) { diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index 61762fcd99..b1b701b4ee 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -1,27 +1,54 @@ /* Amalgamated source file */ #include "ruby-upb.h" /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -137,9 +164,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#ifdef __has_attribute +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -150,7 +208,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -194,8 +252,9 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif +/** upb/decode.c ************************************************************/ #include #include @@ -891,7 +950,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, state.end_group = DECODE_NOGROUP; state.arena.head = arena->head; state.arena.last_size = arena->last_size; - state.arena.cleanups = arena->cleanups; + state.arena.cleanup_metadata = arena->cleanup_metadata; state.arena.parent = arena; if (UPB_UNLIKELY(UPB_SETJMP(state.err))) { @@ -902,7 +961,7 @@ bool _upb_decode(const char *buf, size_t size, void *msg, arena->head.ptr = state.arena.head.ptr; arena->head.end = state.arena.head.end; - arena->cleanups = state.arena.cleanups; + arena->cleanup_metadata = state.arena.cleanup_metadata; return ok; } @@ -911,6 +970,8 @@ bool _upb_decode(const char *buf, size_t size, void *msg, #undef OP_VARPCK_LG2 #undef OP_STRING #undef OP_SUBMSG + +/** upb/encode.c ************************************************************/ /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ @@ -1386,7 +1447,7 @@ char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, return ret; } - +/** upb/msg.c ************************************************************/ /** upb_msg *******************************************************************/ @@ -1517,7 +1578,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return NULL; } - upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a)); + upb_strtable_init(&map->table, 4, a); map->key_size = key_size; map->val_size = value_size; @@ -1638,11 +1699,13 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); return true; } + +/** upb/table.c ************************************************************/ /* -** upb_table Implementation -** -** Implementation is heavily inspired by Lua's ltable.c. -*/ + * upb_table Implementation + * + * Implementation is heavily inspired by Lua's ltable.c. + */ #include @@ -1663,9 +1726,15 @@ static const double MAX_LOAD = 0.85; * cache effects). The lower this is, the more memory we'll use. */ static const double MIN_DENSITY = 0.1; -bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } +static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } -int log2ceil(uint64_t v) { +static upb_value _upb_value_val(uint64_t val) { + upb_value ret; + _upb_value_setval(&ret, val); + return ret; +} + +static int log2ceil(uint64_t v) { int ret = 0; bool pow2 = is_pow2(v); while (v >>= 1) ret++; @@ -1673,11 +1742,7 @@ int log2ceil(uint64_t v) { return UPB_MIN(UPB_MAXARRSIZE, ret); } -char *upb_strdup(const char *s, upb_alloc *a) { - return upb_strdup2(s, strlen(s), a); -} - -char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { +char *upb_strdup2(const char *s, size_t len, upb_arena *a) { size_t n; char *p; @@ -1686,7 +1751,7 @@ char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { /* Always null-terminate, even if binary data; but don't rely on the input to * have a null-terminating byte since it may be a raw binary buffer. */ n = len + 1; - p = upb_malloc(a, n); + p = upb_arena_malloc(a, n); if (p) { memcpy(p, s, len); p[len] = 0; @@ -1721,16 +1786,24 @@ typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); /* Base table (shared code) ***************************************************/ -/* For when we need to cast away const. */ -static upb_tabent *mutable_entries(upb_table *t) { - return (upb_tabent*)t->entries; +static uint32_t upb_inthash(uintptr_t key) { + return (uint32_t)key; +} + +static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { + return t->entries + (hash & t->mask); +} + +static bool upb_arrhas(upb_tabval key) { + return key.val != (uint64_t)-1; } + static bool isfull(upb_table *t) { return t->count == t->max_count; } -static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { +static bool init(upb_table *t, uint8_t size_lg2, upb_arena *a) { size_t bytes; t->count = 0; @@ -1739,21 +1812,17 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { t->max_count = upb_table_size(t) * MAX_LOAD; bytes = upb_table_size(t) * sizeof(upb_tabent); if (bytes > 0) { - t->entries = upb_malloc(a, bytes); + t->entries = upb_arena_malloc(a, bytes); if (!t->entries) return false; - memset(mutable_entries(t), 0, bytes); + memset(t->entries, 0, bytes); } else { t->entries = NULL; } return true; } -static void uninit(upb_table *t, upb_alloc *a) { - upb_free(a, mutable_entries(t)); -} - static upb_tabent *emptyent(upb_table *t, upb_tabent *e) { - upb_tabent *begin = mutable_entries(t); + upb_tabent *begin = t->entries; upb_tabent *end = begin + upb_table_size(t); for (e = e + 1; e < end; e++) { if (upb_tabent_isempty(e)) return e; @@ -1903,9 +1972,9 @@ static size_t begin(const upb_table *t) { /* A simple "subclass" of upb_table that only adds a hash function for strings. */ -static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { +static upb_tabkey strcopy(lookupkey_t k2, upb_arena *a) { uint32_t len = (uint32_t) k2.str.len; - char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); + char *str = upb_arena_malloc(a, k2.str.len + sizeof(uint32_t) + 1); if (str == NULL) return 0; memcpy(str, &len, sizeof(uint32_t)); if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); @@ -1929,9 +1998,7 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) { return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); } -bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_strtable_init(upb_strtable *t, size_t expected_size, upb_arena *a) { // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator. size_t need_entries = (expected_size + 1) * 1204 / 1024; UPB_ASSERT(need_entries >= expected_size * 0.85); @@ -1945,14 +2012,7 @@ void upb_strtable_clear(upb_strtable *t) { memset((char*)t->t.entries, 0, bytes); } -void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { - size_t i; - for (i = 0; i < upb_table_size(&t->t); i++) - upb_free(a, (void*)t->t.entries[i].key); - uninit(&t->t, a); -} - -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a) { upb_strtable new_table; upb_strtable_iter i; @@ -1961,17 +2021,15 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { upb_strtable_begin(&i, t); for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { upb_strview key = upb_strtable_iter_key(&i); - upb_strtable_insert3( - &new_table, key.data, key.size, - upb_strtable_iter_value(&i), a); + upb_strtable_insert(&new_table, key.data, key.size, + upb_strtable_iter_value(&i), a); } - upb_strtable_uninit2(t, a); *t = new_table; return true; } -bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, - upb_value v, upb_alloc *a) { +bool upb_strtable_insert(upb_strtable *t, const char *k, size_t len, + upb_value v, upb_arena *a) { lookupkey_t key; upb_tabkey tabkey; uint32_t hash; @@ -1998,19 +2056,11 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, return lookup(&t->t, strkey2(key, len), v, hash, &streql); } -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc) { +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val) { uint32_t hash = table_hash(key, len); upb_tabkey tabkey; - if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { - if (alloc) { - /* Arena-based allocs don't need to free and won't pass this. */ - upb_free(alloc, (void*)tabkey); - } - return true; - } else { - return false; - } + return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql); } /* Iteration */ @@ -2108,7 +2158,7 @@ static void check(upb_inttable *t) { } bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, - upb_alloc *a) { + upb_arena *a) { size_t array_bytes; if (!init(&t->t, hsize_lg2, a)) return false; @@ -2117,9 +2167,8 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, t->array_size = UPB_MAX(1, asize); t->array_count = 0; array_bytes = t->array_size * sizeof(upb_value); - t->array = upb_malloc(a, array_bytes); + t->array = upb_arena_malloc(a, array_bytes); if (!t->array) { - uninit(&t->t, a); return false; } memset(mutable_array(t), 0xff, array_bytes); @@ -2127,18 +2176,12 @@ bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, return true; } -bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { - UPB_UNUSED(ctype); /* TODO(haberman): rm */ +bool upb_inttable_init(upb_inttable *t, upb_arena *a) { return upb_inttable_sizedinit(t, 0, 4, a); } -void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { - uninit(&t->t, a); - upb_free(a, mutable_array(t)); -} - -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a) { +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a) { upb_tabval tabval; tabval.val = val.val; UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ @@ -2169,7 +2212,6 @@ bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, UPB_ASSERT(t->t.count == new_table.count); - uninit(&t->t, a); t->t = new_table; } insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); @@ -2213,21 +2255,7 @@ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { return success; } -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a) { - return upb_inttable_insert2(t, (uintptr_t)key, val, a); -} - -bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, - upb_value *v) { - return upb_inttable_lookup(t, (uintptr_t)key, v); -} - -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { - return upb_inttable_remove(t, (uintptr_t)key, val); -} - -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { +void upb_inttable_compact(upb_inttable *t, upb_arena *a) { /* A power-of-two histogram of the table keys. */ size_t counts[UPB_MAXARRSIZE + 1] = {0}; @@ -2275,12 +2303,11 @@ void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { upb_inttable_begin(&i, t); for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { uintptr_t k = upb_inttable_iter_key(&i); - upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); + upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a); } UPB_ASSERT(new_t.array_size == arr_size); UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); } - upb_inttable_uninit2(t, a); *t = new_t; } @@ -2354,6 +2381,7 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, i1->array_part == i2->array_part; } +/** upb/upb.c ************************************************************/ #include #include @@ -2420,6 +2448,19 @@ static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, } } +static uint32_t *upb_cleanup_pointer(uintptr_t cleanup_metadata) { + return (uint32_t *)(cleanup_metadata & ~0x1); +} + +static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) { + return cleanup_metadata & 0x1; +} + +static uintptr_t upb_cleanup_metadata(uint32_t *cleanup, + bool has_initial_block) { + return (uintptr_t)cleanup | has_initial_block; +} + upb_alloc upb_alloc_global = {&upb_global_allocfunc}; /* upb_arena ******************************************************************/ @@ -2465,7 +2506,8 @@ static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr, a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char); a->head.end = UPB_PTR_AT(block, size, char); - a->cleanups = &block->cleanups; + a->cleanup_metadata = upb_cleanup_metadata( + &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata)); UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr); } @@ -2513,6 +2555,7 @@ upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) { a->refcount = 1; a->freelist = NULL; a->freelist_tail = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, false); upb_arena_addblock(a, a, mem, n); @@ -2540,7 +2583,7 @@ upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { a->head.ptr = mem; a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); a->freelist = NULL; - a->cleanups = NULL; + a->cleanup_metadata = upb_cleanup_metadata(NULL, true); return a; } @@ -2575,15 +2618,17 @@ void upb_arena_free(upb_arena *a) { bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { cleanup_ent *ent; + uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata); - if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { + if (!cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent)); + cleanups = upb_cleanup_pointer(a->cleanup_metadata); } a->head.end -= sizeof(cleanup_ent); ent = (cleanup_ent*)a->head.end; - (*a->cleanups)++; + (*cleanups)++; UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent)); ent->cleanup = func; @@ -2592,11 +2637,18 @@ bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { return true; } -void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { +bool upb_arena_fuse(upb_arena *a1, upb_arena *a2) { upb_arena *r1 = arena_findroot(a1); upb_arena *r2 = arena_findroot(a2); - if (r1 == r2) return; /* Already fused. */ + if (r1 == r2) return true; /* Already fused. */ + + /* Do not fuse initial blocks since we cannot lifetime extend them. */ + if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false; + if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false; + + /* Only allow fuse with a common allocator */ + if (r1->block_alloc != r2->block_alloc) return false; /* We want to join the smaller tree to the larger tree. * So swap first if they are backwards. */ @@ -2614,12 +2666,15 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { r1->freelist = r2->freelist; } r2->parent = r1; + return true; } -// Fast decoder: ~3x the speed of decode.c, but x86-64 specific. + +/** upb/decode_fast.c ************************************************************/ +// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64. // Also the table size grows by 2x. // -// Could potentially be ported to ARM64 or other 64-bit archs that pass at -// least six arguments in registers. +// Could potentially be ported to other 64-bit archs that pass at least six +// arguments in registers and have 8 unused high bits in pointers. // // The overall design is to create specialized functions for every possible // field type (eg. oneof boolean field with a 1 byte tag) and then dispatch @@ -2639,8 +2694,10 @@ void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { #define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data -#define RETURN_GENERIC(m) \ - /* fprintf(stderr, m); */ \ +#define RETURN_GENERIC(m) \ + /* Uncomment either of these for debugging purposes. */ \ + /* fprintf(stderr, m); */ \ + /*__builtin_trap(); */ \ return fastdecode_generic(d, ptr, msg, table, hasbits, 0); typedef enum { @@ -2651,21 +2708,18 @@ typedef enum { } upb_card; UPB_NOINLINE -static const char *fastdecode_isdonefallback(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits, int overrun) { +static const char *fastdecode_isdonefallback(UPB_PARSE_PARAMS) { + int overrun = data; ptr = decode_isdonefallback_inl(d, ptr, overrun); if (ptr == NULL) { return fastdecode_err(d); } - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, - upb_msg *msg, intptr_t table, - uint64_t hasbits) { +static const char *fastdecode_dispatch(UPB_PARSE_PARAMS) { if (UPB_UNLIKELY(ptr >= d->limit_ptr)) { int overrun = ptr - d->end; if (UPB_LIKELY(overrun == d->limit)) { @@ -2673,21 +2727,22 @@ static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, *(uint32_t*)msg |= hasbits; // Sync hasbits. return ptr; } else { - return fastdecode_isdonefallback(d, ptr, msg, table, hasbits, overrun); + data = overrun; + UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS); } } // Read two bytes of tag data (for a one-byte tag, the high byte is junk). - uint16_t tag = fastdecode_loadtag(ptr); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); + data = fastdecode_loadtag(ptr); + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); } UPB_FORCEINLINE -static bool fastdecode_checktag(uint64_t data, int tagbytes) { +static bool fastdecode_checktag(uint16_t data, int tagbytes) { if (tagbytes == 1) { return (data & 0xff) == 0; } else { - return (data & 0xffff) == 0; + return data == 0; } } @@ -2911,6 +2966,14 @@ static bool fastdecode_flippacked(uint64_t *data, int tagbytes) { return fastdecode_checktag(*data, tagbytes); } +#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \ + UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \ + } \ + RETURN_GENERIC("packed check tag mismatch\n"); \ + } + /* varint fields **************************************************************/ UPB_FORCEINLINE @@ -2953,57 +3016,50 @@ done: return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_unpackedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - bool zigzag, - _upb_field_parser *packed) { - uint64_t val; - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("varint field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("need array resize\n"); - } - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - ptr = fastdecode_varint64(ptr, &val); - if (ptr == NULL) return fastdecode_err(d); - val = fastdecode_munge(val, valbytes, zigzag); - memcpy(dst, &val, valbytes); - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed) \ + uint64_t val; \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_varint64(ptr, &val); \ + if (ptr == NULL) \ + return fastdecode_err(d); \ + val = fastdecode_munge(val, valbytes, zigzag); \ + memcpy(dst, &val, valbytes); \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); typedef struct { uint8_t valbytes; @@ -3032,50 +3088,37 @@ static const char *fastdecode_topackedvarint(upb_decstate *d, const char *ptr, return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_packedvarint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, bool zigzag, - _upb_field_parser *unpacked) { - fastdecode_varintdata ctx = {valbytes, zigzag}; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, - valbytes, CARD_r); - if (UPB_UNLIKELY(!ctx.dst)) { - RETURN_GENERIC("need array resize\n"); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); - - if (UPB_UNLIKELY(ptr == NULL)) { - return fastdecode_err(d); +#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked) \ + fastdecode_varintdata ctx = {valbytes, zigzag}; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \ + \ + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \ + valbytes, CARD_r); \ + if (UPB_UNLIKELY(!ctx.dst)) { \ + RETURN_GENERIC("need array resize\n"); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \ + \ + if (UPB_UNLIKELY(ptr == NULL)) { \ + return fastdecode_err(d); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0); + +#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, zigzag, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, zigzag, packed); \ } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, bool zigzag, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, zigzag, - unpacked); - } else { - return fastdecode_unpackedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, card, - zigzag, packed); - } -} - #define z_ZZ true #define b_ZZ false #define v_ZZ false @@ -3086,10 +3129,10 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, #define F(card, type, valbytes, tagbytes) \ UPB_NOINLINE \ const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - type##_ZZ, \ - &upb_pr##type##valbytes##_##tagbytes##bt, \ - &upb_pp##type##valbytes##_##tagbytes##bt); \ + FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, type##_ZZ, \ + upb_pr##type##valbytes##_##tagbytes##bt, \ + upb_pp##type##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3117,126 +3160,110 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDVARINT +#undef FASTDECODE_PACKEDVARINT +#undef FASTDECODE_VARINT /* fixed fields ***************************************************************/ -UPB_FORCEINLINE -static const char *fastdecode_unpackedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *packed) { - void *dst; - fastdecode_arr farr; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { - return packed(UPB_PARSE_ARGS); - } - RETURN_GENERIC("fixed field tag mismatch\n"); - } - - dst = - fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); - if (card == CARD_r) { - if (UPB_UNLIKELY(!dst)) { - RETURN_GENERIC("couldn't allocate array in arena\n"); - } +#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed) \ + void *dst; \ + fastdecode_arr farr; \ + \ + FASTDECODE_CHECKPACKED(tagbytes, card, packed) \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \ + card); \ + if (card == CARD_r) { \ + if (UPB_UNLIKELY(!dst)) { \ + RETURN_GENERIC("couldn't allocate array in arena\n"); \ + } \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, valbytes); \ + } \ + \ + ptr += tagbytes; \ + memcpy(dst, ptr, valbytes); \ + ptr += valbytes; \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, valbytes); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked) \ + FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \ + \ + ptr += tagbytes; \ + int size = (uint8_t)ptr[0]; \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \ + (size % valbytes) != 0)) { \ + return fastdecode_err(d); \ + } \ + \ + upb_array **arr_p = fastdecode_fieldmem(msg, data); \ + upb_array *arr = *arr_p; \ + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \ + int elems = size / valbytes; \ + \ + if (UPB_LIKELY(!arr)) { \ + *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); \ + if (!arr) { \ + return fastdecode_err(d); \ + } \ + } else { \ + _upb_array_resize(arr, elems, &d->arena); \ + } \ + \ + char *dst = _upb_array_ptr(arr); \ + memcpy(dst, ptr, size); \ + arr->len = elems; \ + \ + ptr += size; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, unpacked, packed) \ + if (card == CARD_p) { \ + FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, unpacked); \ + } else { \ + FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \ + valbytes, card, packed); \ } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, valbytes); - } - - ptr += tagbytes; - memcpy(dst, ptr, valbytes); - ptr += valbytes; - - if (card == CARD_r) { - fastdecode_nextret ret = - fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_packedfixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, - _upb_field_parser *unpacked) { - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - if (fastdecode_flippacked(&data, tagbytes)) { - return unpacked(UPB_PARSE_ARGS); - } else { - RETURN_GENERIC("varint field tag mismatch\n"); - } - } - - ptr += tagbytes; - int size = (uint8_t)ptr[0]; - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); - } - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr)) || - (size % valbytes) != 0) { - return fastdecode_err(d); - } - - upb_array **arr_p = fastdecode_fieldmem(msg, data); - upb_array *arr = *arr_p; - uint8_t elem_size_lg2 = __builtin_ctz(valbytes); - int elems = size / valbytes; - - if (UPB_LIKELY(!arr)) { - *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); - if (!arr) { - return fastdecode_err(d); - } - } else { - _upb_array_resize(arr, elems, &d->arena); - } - - char *dst = _upb_array_ptr(arr); - memcpy(dst, ptr, size); - arr->len = elems; - - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, - _upb_field_parser *unpacked, - _upb_field_parser *packed) { - if (card == CARD_p) { - return fastdecode_packedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, unpacked); - } else { - return fastdecode_unpackedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, card, - packed); - } -} - /* Generate all combinations: * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ -#define F(card, valbytes, tagbytes) \ - UPB_NOINLINE \ - const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - &upb_ppf##valbytes##_##tagbytes##bt, \ - &upb_prf##valbytes##_##tagbytes##bt); \ +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \ + CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \ + upb_prf##valbytes##_##tagbytes##bt); \ } #define TYPES(card, tagbytes) \ @@ -3255,6 +3282,8 @@ TAGBYTES(p) #undef F #undef TYPES #undef TAGBYTES +#undef FASTDECODE_UNPACKEDFIXED +#undef FASTDECODE_PACKEDFIXED /* string fields **************************************************************/ @@ -3266,56 +3295,54 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d, UPB_NOINLINE static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, upb_strview *dst) { + uint64_t hasbits, uint64_t data) { + upb_strview *dst = (upb_strview*)data; if (!decode_verifyutf8_inl(dst->data, dst->size)) { return fastdecode_err(d); } - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_longstring(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst, - bool validate_utf8) { - int size = (uint8_t)ptr[0]; // Could plumb through hasbits. - ptr++; - if (size & 0x80) { - ptr = fastdecode_longsize(ptr, &size); - } - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { - dst->size = 0; - return fastdecode_err(d); - } - - if (d->alias) { - dst->data = ptr; - dst->size = size; - } else { - char *data = upb_arena_malloc(&d->arena, size); - if (!data) { - return fastdecode_err(d); - } - memcpy(data, ptr, size); - dst->data = data; - dst->size = size; + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} + +#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ + int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ + ptr++; \ + if (size & 0x80) { \ + ptr = fastdecode_longsize(ptr, &size); \ + } \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \ + dst->size = 0; \ + return fastdecode_err(d); \ + } \ + \ + if (d->alias) { \ + dst->data = ptr; \ + dst->size = size; \ + } else { \ + char *data = upb_arena_malloc(&d->arena, size); \ + if (!data) { \ + return fastdecode_err(d); \ + } \ + memcpy(data, ptr, size); \ + dst->data = data; \ + dst->size = size; \ + } \ + \ + ptr += size; \ + if (validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } else { \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } - if (validate_utf8) { - return fastdecode_verifyutf8(d, ptr + size, msg, table, hasbits, dst); - } else { - return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); - } -} - UPB_NOINLINE static const char *fastdecode_longstring_utf8(struct upb_decstate *d, - const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, true); + const char *ptr, upb_msg *msg, + intptr_t table, uint64_t hasbits, + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true); } UPB_NOINLINE @@ -3323,8 +3350,9 @@ static const char *fastdecode_longstring_noutf8(struct upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, uint64_t hasbits, - upb_strview *dst) { - return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, false); + uint64_t data) { + upb_strview *dst = (upb_strview*)data; + FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false); } UPB_FORCEINLINE @@ -3337,156 +3365,165 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, UPB_POISON_MEMORY_REGION(data + size, copy - size); } -UPB_FORCEINLINE -static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - size_t arena_has; - size_t common_has; - char *buf; - - UPB_ASSERT(!d->alias); - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (uint8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->size = size; - - buf = d->arena.head.ptr; - arena_has = _upb_arenahas(&d->arena); - common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); - - if (UPB_LIKELY(size <= 15 - tagbytes)) { - if (arena_has < 16) goto longstr; - d->arena.head.ptr += 16; - memcpy(buf, ptr - tagbytes - 1, 16); - dst->data = buf + tagbytes + 1; - } else if (UPB_LIKELY(size <= 32)) { - if (UPB_UNLIKELY(common_has < 32)) goto longstr; - fastdecode_docopy(d, ptr, size, 32, buf, dst); - } else if (UPB_LIKELY(size <= 64)) { - if (UPB_UNLIKELY(common_has < 64)) goto longstr; - fastdecode_docopy(d, ptr, size, 64, buf, dst); - } else if (UPB_LIKELY(size < 128)) { - if (UPB_UNLIKELY(common_has < 128)) goto longstr; - fastdecode_docopy(d, ptr, size, 128, buf, dst); - } else { - goto longstr; - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); - -longstr: - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } -} - -UPB_FORCEINLINE -static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, - upb_card card, _upb_field_parser *copyfunc, - bool validate_utf8) { - upb_strview *dst; - fastdecode_arr farr; - int64_t size; - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("string field tag mismatch\n"); - } - - if (UPB_UNLIKELY(!d->alias)) { - return copyfunc(UPB_PARSE_ARGS); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_strview), card); - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); - } - - size = (int8_t)ptr[tagbytes]; - ptr += tagbytes + 1; - dst->data = ptr; - dst->size = size; - - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { - ptr--; - if (validate_utf8) { - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); - } else { - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); - } - } - - ptr += size; - - if (card == CARD_r) { - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { - return fastdecode_err(d); - } - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - if (UPB_UNLIKELY(!d->alias)) { - // Buffer flipped and we can't alias any more. Bounce to copyfunc(), - // but via dispatch since we need to reload table data also. - fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - } - goto again; - case FD_NEXT_OTHERFIELD: - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - return ptr; - } - } - - if (card != CARD_r && validate_utf8) { - return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); - } - - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char *buf; \ + \ + UPB_ASSERT(!d->alias); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_arenahas(&d->arena); \ + common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) \ + goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } + +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY(!d->alias)) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->data = ptr; \ + dst->size = size; \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + if (UPB_UNLIKELY(!d->alias)) { \ + /* Buffer flipped and we can't alias any more. Bounce to */ \ + /* copyfunc(), but via dispatch since we need to reload table */ \ + /* data also. */ \ + fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + } \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ @@ -3494,16 +3531,16 @@ again: #define s_VALIDATE true #define b_VALIDATE false -#define F(card, tagbytes, type) \ - UPB_NOINLINE \ - const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - type##_VALIDATE); \ - } \ - const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ - return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ - &upb_c##card##type##_##tagbytes##bt, \ - type##_VALIDATE); \ +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, type##_VALIDATE); \ + } \ + const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + CARD_##card, upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ } #define UTF8(card, tagbytes) \ @@ -3522,6 +3559,9 @@ TAGBYTES(r) #undef b_VALIDATE #undef F #undef TAGBYTES +#undef FASTDECODE_LONGSTRING +#undef FASTDECODE_COPYSTRING +#undef FASTDECODE_STRING /* message fields *************************************************************/ @@ -3554,82 +3594,82 @@ UPB_FORCEINLINE static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr, void *ctx) { fastdecode_submsgdata *submsg = ctx; - ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0); + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0); UPB_ASSUME(ptr != NULL); return ptr; } -UPB_FORCEINLINE -static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, - int msg_ceil_bytes, upb_card card) { - - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { - RETURN_GENERIC("submessage field tag mismatch\n"); - } - - if (--d->depth == 0) return fastdecode_err(d); - - upb_msg **dst; - uint32_t submsg_idx = (data >> 16) & 0xff; - const upb_msglayout *tablep = decode_totablep(table); - const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; - fastdecode_submsgdata submsg = {decode_totable(subtablep)}; - fastdecode_arr farr; - - if (subtablep->table_mask == (uint8_t)-1) { - RETURN_GENERIC("submessage doesn't have fast tables."); - } - - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, - sizeof(upb_msg *), card); - - if (card == CARD_s) { - *(uint32_t*)msg |= hasbits; - hasbits = 0; - } - -again: - if (card == CARD_r) { - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg*)); - } - - submsg.msg = *dst; - - if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { - *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); - } - - ptr += tagbytes; - ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); - - if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { - return fastdecode_err(d); - } - - if (card == CARD_r) { - fastdecode_nextret ret = fastdecode_nextrepeated( - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); - switch (ret.next) { - case FD_NEXT_SAMEFIELD: - dst = ret.dst; - goto again; - case FD_NEXT_OTHERFIELD: - d->depth++; - return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); - case FD_NEXT_ATLIMIT: - d->depth++; - return ptr; - } - } - - d->depth++; - return fastdecode_dispatch(d, ptr, msg, table, hasbits); -} - -#define F(card, tagbytes, size_ceil, ceil_arg) \ - const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ - UPB_PARSE_PARAMS) { \ - return fastdecode_submsg(UPB_PARSE_ARGS, tagbytes, ceil_arg, CARD_##card); \ +#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \ + msg_ceil_bytes, card) \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("submessage field tag mismatch\n"); \ + } \ + \ + if (--d->depth == 0) return fastdecode_err(d); \ + \ + upb_msg **dst; \ + uint32_t submsg_idx = (data >> 16) & 0xff; \ + const upb_msglayout *tablep = decode_totablep(table); \ + const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; \ + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \ + fastdecode_arr farr; \ + \ + if (subtablep->table_mask == (uint8_t)-1) { \ + RETURN_GENERIC("submessage doesn't have fast tables."); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_msg *), card); \ + \ + if (card == CARD_s) { \ + *(uint32_t *)msg |= hasbits; \ + hasbits = 0; \ + } \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg *)); \ + } \ + \ + submsg.msg = *dst; \ + \ + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \ + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \ + } \ + \ + ptr += tagbytes; \ + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \ + \ + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \ + return fastdecode_err(d); \ + } \ + \ + if (card == CARD_r) { \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + d->depth++; \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + d->depth++; \ + return ptr; \ + } \ + } \ + \ + d->depth++; \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \ + CARD_##card); \ } #define SIZES(card, tagbytes) \ @@ -3650,9 +3690,11 @@ TAGBYTES(r) #undef TAGBYTES #undef SIZES #undef F +#undef FASTDECODE_SUBMSG #endif /* UPB_FASTTABLE */ -/* This file was generated by upbc (the upb compiler) from the input + +/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -4135,6 +4177,7 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { +/** upb/def.c ************************************************************/ #include #include @@ -4172,7 +4215,6 @@ struct upb_fielddef { uint32_t number_; uint16_t index_; uint16_t layout_index; - uint32_t selector_base; /* Used to index into a upb::Handlers table. */ bool is_extension_; bool lazy_; bool packed_; @@ -4185,8 +4227,6 @@ struct upb_msgdef { const upb_msglayout *layout; const upb_filedef *file; const char *full_name; - uint32_t selector_count; - uint32_t submsg_field_count; /* Tables for looking up fields by number and name. */ upb_inttable itof; @@ -4316,30 +4356,6 @@ int cmp_fields(const void *p1, const void *p2) { return field_rank(f1) - field_rank(f2); } -/* A few implementation details of handlers. We put these here to avoid - * a def -> handlers dependency. */ - -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ - -static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; -} - -static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; -} - static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } @@ -4431,8 +4447,7 @@ bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { upb_value v; - return upb_inttable_lookup32(&def->iton, num, &v) ? - upb_value_getcstr(v) : NULL; + return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getcstr(v) : NULL; } const char *upb_enum_iter_name(upb_enum_iter *iter) { @@ -4521,10 +4536,6 @@ const char *upb_fielddef_jsonname(const upb_fielddef *f) { return f->json_name; } -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; -} - const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { return f->file; } @@ -4687,18 +4698,10 @@ upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { return m->file->syntax; } -size_t upb_msgdef_selectorcount(const upb_msgdef *m) { - return m->selector_count; -} - -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { - return m->submsg_field_count; -} - const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { upb_value val; - return upb_inttable_lookup32(&m->itof, i, &val) ? - upb_value_getconstptr(val) : NULL; + return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) + : NULL; } const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, @@ -4906,8 +4909,8 @@ const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { upb_value val; - return upb_inttable_lookup32(&o->itof, num, &val) ? - upb_value_getptr(val) : NULL; + return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val) + : NULL; } void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { @@ -4987,7 +4990,6 @@ void upb_symtab_free(upb_symtab *s) { upb_symtab *upb_symtab_new(void) { upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; if (!s) { return NULL; @@ -4995,10 +4997,9 @@ upb_symtab *upb_symtab_new(void) { s->arena = upb_arena_new(); s->bytes_loaded = 0; - alloc = upb_arena_alloc(s->arena); - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + if (!upb_strtable_init(&s->syms, 32, s->arena) || + !upb_strtable_init(&s->files, 4, s->arena)) { upb_arena_free(s->arena); upb_gfree(s); s = NULL; @@ -5054,8 +5055,7 @@ int upb_symtab_filecount(const upb_symtab *s) { typedef struct { upb_symtab *symtab; upb_filedef *file; /* File we are building. */ - upb_arena *file_arena; /* Allocate defs here. */ - upb_alloc *alloc; /* Alloc of file_arena, for tables. */ + upb_arena *arena; /* Allocate defs here. */ const upb_msglayout **layouts; /* NULL if we should build layouts. */ upb_status *status; /* Record errors here. */ jmp_buf err; /* longjmp() on error. */ @@ -5077,7 +5077,7 @@ static void symtab_oomerr(symtab_addctx *ctx) { } void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { - void *ret = upb_arena_malloc(ctx->file_arena, bytes); + void *ret = upb_arena_malloc(ctx->arena, bytes); if (!ret) symtab_oomerr(ctx); return ret; } @@ -5184,13 +5184,21 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msg_field_iter it; upb_msg_oneof_iter oit; size_t hasbit; - size_t submsg_count = m->submsg_field_count; + size_t field_count = upb_msgdef_numfields(m); + size_t submsg_count = 0; const upb_msglayout **submsgs; upb_msglayout_field *fields; memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); - fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + /* Count sub-messages. */ + for (size_t i = 0; i < field_count; i++) { + if (upb_fielddef_issubmsg(&m->fields[i])) { + submsg_count++; + } + } + + fields = symtab_alloc(ctx, field_count * sizeof(*fields)); submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); @@ -5341,51 +5349,8 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { assign_layout_indices(m, fields); } -static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); -} - static char *strviewdup(symtab_addctx *ctx, upb_strview view) { - return upb_strdup2(view.data, view.size, ctx->alloc); + return upb_strdup2(view.data, view.size, ctx->arena); } static bool streql2(const char *a, size_t n, const char *b) { @@ -5496,9 +5461,9 @@ static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { symtab_errf(ctx, "duplicate symbol '%s'", name); } - upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); size_t len = strlen(name); - CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); + CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v, + ctx->symtab->arena)); } /* Given a symbol and the base symbol inside which it is defined, find the @@ -5531,7 +5496,8 @@ static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, } notfound: - symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); + symtab_errf(ctx, "couldn't resolve name '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(sym)); } static void create_oneofdef( @@ -5549,10 +5515,10 @@ static void create_oneofdef( v = pack_def(o, UPB_DEFTYPE_ONEOF); symtab_add(ctx, o->full_name, v); - CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena)); - CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); + CHK_OOM(upb_inttable_init(&o->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena)); } static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { @@ -5608,8 +5574,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_INT64: { - /* XXX: Need to write our own strtoll, since it's not available in c89. */ - int64_t val = strtol(str, &end, 0); + long long val = strtoll(str, &end, 0); if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { goto invalid; } @@ -5625,8 +5590,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_UINT64: { - /* XXX: Need to write our own strtoull, since it's not available in c89. */ - uint64_t val = strtoul(str, &end, 0); + unsigned long long val = strtoull(str, &end, 0); if (val > UINT64_MAX || errno == ERANGE || *end) { goto invalid; } @@ -5642,8 +5606,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, break; } case UPB_TYPE_FLOAT: { - /* XXX: Need to write our own strtof, since it's not available in c89. */ - float val = strtod(str, &end); + float val = strtof(str, &end); if (errno == ERANGE || *end) { goto invalid; } @@ -5709,7 +5672,6 @@ static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { static void create_fielddef( symtab_addctx *ctx, const char *prefix, upb_msgdef *m, const google_protobuf_FieldDescriptorProto *field_proto) { - upb_alloc *alloc = ctx->alloc; upb_fielddef *f; const google_protobuf_FieldOptions *options; upb_strview name; @@ -5745,7 +5707,8 @@ static void create_fielddef( upb_value v, field_v, json_v; size_t json_size; - f = (upb_fielddef*)&m->fields[m->field_count++]; + f = (upb_fielddef*)&m->fields[m->field_count]; + f->index_ = m->field_count++; f->msgdef = m; f->is_extension_ = false; @@ -5766,12 +5729,12 @@ static void create_fielddef( v = upb_value_constptr(f); json_size = strlen(json_name); - CHK_OOM( - upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); - CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v, + ctx->arena)); + CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena)); if (strcmp(shortname, json_name) != 0) { - upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + upb_strtable_insert(&m->ntof, json_name, json_size, json_v, ctx->arena); } if (ctx->layouts) { @@ -5834,15 +5797,16 @@ static void create_fielddef( symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); } - oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; + oneof = (upb_oneofdef *)&m->oneofs[oneof_index]; f->oneof = oneof; oneof->field_count++; if (f->proto3_optional_) { oneof->synthetic = true; } - CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena)); + CHK_OOM( + upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena)); } else { f->oneof = NULL; if (f->proto3_optional_) { @@ -5885,8 +5849,8 @@ static void create_enumdef( symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); - CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); + CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena)); + CHK_OOM(upb_inttable_init(&e->iton, ctx->arena)); e->file = ctx->file; e->defaultval = 0; @@ -5913,16 +5877,15 @@ static void create_enumdef( } CHK_OOM(name2) - CHK_OOM( - upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); + CHK_OOM(upb_strtable_insert(&e->ntoi, name2, strlen(name2), v, ctx->arena)); if (!upb_inttable_lookup(&e->iton, num, NULL)) { upb_value v = upb_value_cstr(name2); - CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); + CHK_OOM(upb_inttable_insert(&e->iton, num, v, ctx->arena)); } } - upb_inttable_compact2(&e->iton, ctx->alloc); + upb_inttable_compact(&e->iton, ctx->arena); } static void create_msgdef(symtab_addctx *ctx, const char *prefix, @@ -5946,9 +5909,8 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); - CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field, - ctx->alloc)); + CHK_OOM(upb_inttable_init(&m->itof, ctx->arena)); + CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena)); m->file = ctx->file; m->map_entry = false; @@ -5980,10 +5942,9 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, create_fielddef(ctx, m->full_name, m, fields[i]); } - assign_msg_indices(ctx, m); finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); - upb_inttable_compact2(&m->itof, ctx->alloc); + upb_inttable_compact(&m->itof, ctx->arena); /* This message is built. Now build nested messages and enums. */ @@ -6212,19 +6173,18 @@ static void build_filedef( } static void remove_filedef(upb_symtab *s, upb_filedef *file) { - upb_alloc *alloc = upb_arena_alloc(s->arena); int i; for (i = 0; i < file->msg_count; i++) { const char *name = file->msgs[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->enum_count; i++) { const char *name = file->enums[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } for (i = 0; i < file->ext_count; i++) { const char *name = file->exts[i].full_name; - upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + upb_strtable_remove(&s->syms, name, strlen(name), NULL); } } @@ -6242,8 +6202,7 @@ static const upb_filedef *_upb_symtab_addfile( ctx.file = file; ctx.symtab = s; - ctx.file_arena = file_arena; - ctx.alloc = upb_arena_alloc(file_arena); + ctx.arena = file_arena; ctx.layouts = layouts; ctx.status = status; @@ -6258,8 +6217,8 @@ static const upb_filedef *_upb_symtab_addfile( file = NULL; } else { build_filedef(&ctx, file, file_proto); - upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), ctx.alloc); + upb_strtable_insert(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.arena); UPB_ASSERT(upb_ok(status)); upb_arena_fuse(s->arena, file_arena); } @@ -6333,6 +6292,7 @@ upb_arena *_upb_symtab_arena(const upb_symtab *s) { #undef CHK_OOM +/** upb/reflection.c ************************************************************/ #include @@ -6443,40 +6403,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { return _upb_msg_getraw(msg, f); } else { - /* TODO(haberman): change upb_fielddef to not require this switch(). */ - upb_msgval val = {0}; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: - val.int32_val = upb_fielddef_defaultint32(f); - break; - case UPB_TYPE_INT64: - val.int64_val = upb_fielddef_defaultint64(f); - break; - case UPB_TYPE_UINT32: - val.uint32_val = upb_fielddef_defaultuint32(f); - break; - case UPB_TYPE_UINT64: - val.uint64_val = upb_fielddef_defaultuint64(f); - break; - case UPB_TYPE_FLOAT: - val.float_val = upb_fielddef_defaultfloat(f); - break; - case UPB_TYPE_DOUBLE: - val.double_val = upb_fielddef_defaultdouble(f); - break; - case UPB_TYPE_BOOL: - val.bool_val = upb_fielddef_defaultbool(f); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); - break; - case UPB_TYPE_MESSAGE: - val.msg_val = NULL; - break; - } - return val; + return upb_fielddef_default(f); } } @@ -6736,6 +6663,7 @@ upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { /* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ +/** upb/json_decode.c ************************************************************/ #include #include @@ -7646,17 +7574,17 @@ static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { return; } - if (upb_fielddef_realcontainingoneof(f) && - upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { - jsondec_err(d, "More than one field for this oneof."); - } - if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { /* JSON "null" indicates a default value, so no need to set anything. */ jsondec_null(d); return; } + if (upb_fielddef_realcontainingoneof(f) && + upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + preserved = d->debug_field; d->debug_field = f; @@ -8160,6 +8088,9 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, const upb_msgdef *m, const upb_symtab *any_pool, int options, upb_arena *arena, upb_status *status) { jsondec d; + + if (size == 0) return true; + d.ptr = buf; d.end = buf + size; d.arena = arena; @@ -8178,6 +8109,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, return true; } +/** upb/json_encode.c ************************************************************/ #include #include @@ -8207,7 +8139,7 @@ static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m); + const upb_msgdef *m, bool first); static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { @@ -8238,8 +8170,10 @@ static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { memcpy(e->ptr, data, len); e->ptr += len; } else { - if (have) memcpy(e->ptr, data, have); - e->ptr += have; + if (have) { + memcpy(e->ptr, data, have); + e->ptr += have; + } e->overflow += (len - have); } } @@ -8261,7 +8195,7 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { if (UPB_LIKELY(have > n)) { e->ptr += n; } else { - e->ptr += have; + e->ptr = UPB_PTRADD(e->ptr, have); e->overflow += (n - have); } } @@ -8365,7 +8299,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const unsigned char *ptr = (unsigned char*)str.data; - const unsigned char *end = ptr + str.size; + const unsigned char *end = UPB_PTRADD(ptr, str.size); char buf[4]; jsonenc_putstr(e, "\""); @@ -8401,7 +8335,7 @@ static void jsonenc_bytes(jsonenc *e, upb_strview str) { static void jsonenc_stringbody(jsonenc *e, upb_strview str) { const char *ptr = str.data; - const char *end = ptr + str.size; + const char *end = UPB_PTRADD(ptr, str.size); while (ptr < end) { switch (*ptr) { @@ -8517,14 +8451,13 @@ static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{\"@type\":"); jsonenc_string(e, type_url); - jsonenc_putstr(e, ","); if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */ - jsonenc_msgfields(e, any, any_m); + jsonenc_msgfields(e, any, any_m, false); } else { /* Well-known type: {"@type": "...","value": } */ - jsonenc_putstr(e, "\"value\":"); + jsonenc_putstr(e, ",\"value\":"); jsonenc_msgfield(e, any, any_m); } @@ -8827,10 +8760,9 @@ static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, } static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, - const upb_msgdef *m) { + const upb_msgdef *m, bool first) { upb_msgval val; const upb_fielddef *f; - bool first = true; if (e->options & UPB_JSONENC_EMITDEFAULTS) { /* Iterate over all fields. */ @@ -8853,7 +8785,7 @@ static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { jsonenc_putstr(e, "{"); - jsonenc_msgfields(e, msg, m); + jsonenc_msgfields(e, msg, m, true); jsonenc_putstr(e, "}"); } @@ -8875,7 +8807,7 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, e.buf = buf; e.ptr = buf; - e.end = buf + size; + e.end = UPB_PTRADD(buf, size); e.overflow = 0; e.options = options; e.ext_pool = ext_pool; @@ -8888,27 +8820,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, if (e.arena) upb_arena_free(e.arena); return jsonenc_nullz(&e, size); } + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index fa04393558..68d6345fc5 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -1,26 +1,53 @@ /* Amalgamated source file */ -#include /* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This is where we define macros used across upb. + * + * All of these macros are undef'd in port_undef.inc to avoid leaking them to + * users. + * + * The correct usage is: + * + * #include "upb/foobar.h" + * #include "upb/baz.h" + * + * // MUST be last included header. + * #include "upb/port_def.inc" + * + * // Code for this file. + * // <...> + * + * // Can be omitted for .c files, required for .h. + * #include "upb/port_undef.inc" + * + * This file is private and must not be included by users! + */ #if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \ @@ -136,9 +163,40 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ +#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) + /* Configure whether fasttable is switched on or not. *************************/ -#if defined(__x86_64__) && defined(__GNUC__) +#ifdef __has_attribute +#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define UPB_HAS_ATTRIBUTE(x) 0 +#endif + +#if UPB_HAS_ATTRIBUTE(musttail) +#define UPB_MUSTTAIL __attribute__((musttail)) +#else +#define UPB_MUSTTAIL +#endif + +#undef UPB_HAS_ATTRIBUTE + +/* This check is not fully robust: it does not require that we have "musttail" + * support available. We need tail calls to avoid consuming arbitrary amounts + * of stack space. + * + * GCC/Clang can mostly be trusted to generate tail calls as long as + * optimization is enabled, but, debug builds will not generate tail calls + * unless "musttail" is available. + * + * We should probably either: + * 1. require that the compiler supports musttail. + * 2. add some fallback code for when musttail isn't available (ie. return + * instead of tail calling). This is safe and portable, but this comes at + * a CPU cost. + */ +#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__) #define UPB_FASTTABLE_SUPPORTED 1 #else #define UPB_FASTTABLE_SUPPORTED 0 @@ -149,7 +207,7 @@ * for example for testing or benchmarking. */ #if defined(UPB_ENABLE_FASTTABLE) #if !UPB_FASTTABLE_SUPPORTED -#error fasttable is x86-64 + Clang/GCC only +#error fasttable is x86-64/ARM64 only and requires GCC or Clang. #endif #define UPB_FASTTABLE 1 /* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. @@ -193,55 +251,36 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); ((void)(addr), (void)(size)) #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) -#endif +#endif + +/** upb/decode.h ************************************************************/ /* -** upb_decode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_decode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_DECODE_H_ #define UPB_DECODE_H_ + +/** upb/msg.h ************************************************************/ /* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ + * Public APIs for message operations that do not require descriptors. + * These functions can be used even in build that does not want to depend on + * reflection or descriptors. + * + * Descriptor-based reflection functionality lives in reflection.h. + */ #ifndef UPB_MSG_H_ #define UPB_MSG_H_ -#include -#include -#include - -/* -** upb_table -** -** This header is INTERNAL-ONLY! Its interfaces are not public or stable! -** This file defines very fast int->upb_value (inttable) and string->upb_value -** (strtable) hash tables. -** -** The table uses chained scatter with Brent's variation (inspired by the Lua -** implementation of hash tables). The hash function for strings is Austin -** Appleby's "MurmurHash." -** -** The inttable uses uintptr_t as its key, which guarantees it can be used to -** store pointers or integers of at least 32 bits (upb isn't really useful on -** systems where sizeof(void*) < 4). -** -** The table must be homogeneous (all values of the same type). In debug -** mode, we check this on insert and lookup. -*/ +#include -#ifndef UPB_TABLE_H_ -#define UPB_TABLE_H_ -#include -#include +/** upb/upb.h ************************************************************/ /* -** This file contains shared definitions that are widely used across upb. -*/ + * This file contains shared definitions that are widely used across upb. + */ #ifndef UPB_H_ #define UPB_H_ @@ -399,7 +438,7 @@ typedef struct { upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc); void upb_arena_free(upb_arena *a); bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func); -void upb_arena_fuse(upb_arena *a, upb_arena *b); +bool upb_arena_fuse(upb_arena *a, upb_arena *b); void *_upb_arena_slowmalloc(upb_arena *a, size_t size); UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } @@ -578,55 +617,134 @@ UPB_INLINE int _upb_lg2ceilsize(int x) { #endif /* UPB_H_ */ +#ifdef __cplusplus +extern "C" { +#endif + +typedef void upb_msg; + +/* For users these are opaque. They can be obtained from upb_msgdef_layout() + * but users cannot access any of the members. */ +struct upb_msglayout; +typedef struct upb_msglayout upb_msglayout; + +/* Adds unknown data (serialized protobuf data) to the given message. The data + * is copied into the message instance. */ +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena); + +/* Returns a reference to the message's unknown data. */ +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_MSG_INT_H_ */ + +/* Must be last. */ #ifdef __cplusplus extern "C" { #endif +enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ + UPB_DECODE_ALIAS = 1, +}; -/* upb_value ******************************************************************/ +#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) -/* A tagged union (stored untagged inside the table) so that we can check that - * clients calling table accessors are correctly typed without having to have - * an explosion of accessors. */ -typedef enum { - UPB_CTYPE_INT32 = 1, - UPB_CTYPE_INT64 = 2, - UPB_CTYPE_UINT32 = 3, - UPB_CTYPE_UINT64 = 4, - UPB_CTYPE_BOOL = 5, - UPB_CTYPE_CSTR = 6, - UPB_CTYPE_PTR = 7, - UPB_CTYPE_CONSTPTR = 8, - UPB_CTYPE_FPTR = 9, - UPB_CTYPE_FLOAT = 10, - UPB_CTYPE_DOUBLE = 11 -} upb_ctype_t; +bool _upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena, int options); + +UPB_INLINE +bool upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena) { + return _upb_decode(buf, size, msg, l, arena, 0); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_DECODE_H_ */ + +/** upb/decode_internal.h ************************************************************/ +/* + * Internal implementation details of the decoder that are shared between + * decode.c and decode_fast.c. + */ + +#ifndef UPB_DECODE_INT_H_ +#define UPB_DECODE_INT_H_ + +#include + + +/** upb/msg_internal.h ************************************************************//* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MSG_INT_H_ +#define UPB_MSG_INT_H_ + +#include +#include +#include + + +/** upb/table_internal.h ************************************************************/ +/* + * upb_table + * + * This header is INTERNAL-ONLY! Its interfaces are not public or stable! + * This file defines very fast int->upb_value (inttable) and string->upb_value + * (strtable) hash tables. + * + * The table uses chained scatter with Brent's variation (inspired by the Lua + * implementation of hash tables). The hash function for strings is Austin + * Appleby's "MurmurHash." + * + * The inttable uses uintptr_t as its key, which guarantees it can be used to + * store pointers or integers of at least 32 bits (upb isn't really useful on + * systems where sizeof(void*) < 4). + * + * The table must be homogeneous (all values of the same type). In debug + * mode, we check this on insert and lookup. + */ + +#ifndef UPB_TABLE_H_ +#define UPB_TABLE_H_ + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* upb_value ******************************************************************/ typedef struct { uint64_t val; } upb_value; -/* Like strdup(), which isn't always available since it's not ANSI C. */ -char *upb_strdup(const char *s, upb_alloc *a); /* Variant that works with a length-delimited rather than NULL-delimited string, * as supported by strtable. */ -char *upb_strdup2(const char *s, size_t len, upb_alloc *a); - -UPB_INLINE char *upb_gstrdup(const char *s) { - return upb_strdup(s, &upb_alloc_global); -} +char *upb_strdup2(const char *s, size_t len, upb_arena *a); UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) { v->val = val; } -UPB_INLINE upb_value _upb_value_val(uint64_t val) { - upb_value ret; - _upb_value_setval(&ret, val); - return ret; -} - /* For each value ctype, define the following set of functions: * * // Get/set an int32 from a upb_value. @@ -734,14 +852,7 @@ typedef struct { uint32_t mask; /* Mask to turn hash value -> bucket. */ uint32_t max_count; /* Max count before we hit our load limit. */ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */ - - /* Hash table entries. - * Making this const isn't entirely accurate; what we really want is for it to - * have the same const-ness as the table it's inside. But there's no way to - * declare that in C. So we have to make it const so that we can statically - * initialize const hash tables. Then we cast away const when we have to. - */ - const upb_tabent *entries; + upb_tabent *entries; } upb_table; typedef struct { @@ -755,8 +866,6 @@ typedef struct { size_t array_count; /* Array part number of elements. */ } upb_inttable; -#define UPB_ARRAY_EMPTYENT -1 - UPB_INLINE size_t upb_table_size(const upb_table *t) { if (t->size_lg2 == 0) return 0; @@ -769,48 +878,10 @@ UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) { return e->key == 0; } -/* Used by some of the unit tests for generic hashing functionality. */ -uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed); - -UPB_INLINE uintptr_t upb_intkey(uintptr_t key) { - return key; -} - -UPB_INLINE uint32_t upb_inthash(uintptr_t key) { - return (uint32_t)key; -} - -static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) { - return t->entries + (hash & t->mask); -} - -UPB_INLINE bool upb_arrhas(upb_tabval key) { - return key.val != (uint64_t)-1; -} - /* Initialize and uninitialize a table, respectively. If memory allocation * failed, false is returned that the table is uninitialized. */ -bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a); -bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, - size_t expected_size, upb_alloc *a); -void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a); -void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a); - -UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) { - return upb_inttable_init2(table, ctype, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) { - return upb_strtable_init2(table, ctype, 4, &upb_alloc_global); -} - -UPB_INLINE void upb_inttable_uninit(upb_inttable *table) { - upb_inttable_uninit2(table, &upb_alloc_global); -} - -UPB_INLINE void upb_strtable_uninit(upb_strtable *table) { - upb_strtable_uninit2(table, &upb_alloc_global); -} +bool upb_inttable_init(upb_inttable *table, upb_arena *a); +bool upb_strtable_init(upb_strtable *table, size_t expected_size, upb_arena *a); /* Returns the number of values in the table. */ size_t upb_inttable_count(const upb_inttable *t); @@ -818,12 +889,6 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) { return t->t.count; } -void upb_inttable_packedsize(const upb_inttable *t, size_t *size); -void upb_strtable_packedsize(const upb_strtable *t, size_t *size); -upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs, - size_t size); -upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs, - size_t size); void upb_strtable_clear(upb_strtable *t); /* Inserts the given key into the hashtable with the given value. The key must @@ -833,26 +898,10 @@ void upb_strtable_clear(upb_strtable *t); * * If a table resize was required but memory allocation failed, false is * returned and the table is unchanged. */ -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a); -bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len, - upb_value val, upb_alloc *a); - -UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key, - upb_value val) { - return upb_inttable_insert2(t, key, val, &upb_alloc_global); -} - -UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key, - size_t len, upb_value val) { - return upb_strtable_insert3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key, - upb_value val) { - return upb_strtable_insert2(t, key, strlen(key), val); -} +bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val, + upb_arena *a); +bool upb_strtable_insert(upb_strtable *t, const char *key, size_t len, + upb_value val, upb_arena *a); /* Looks up key in this table, returning "true" if the key was found. * If v is non-NULL, copies the value for this key into *v. */ @@ -869,74 +918,21 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key, /* Removes an item from the table. Returns true if the remove was successful, * and stores the removed item in *val if non-NULL. */ bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val); -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc); - -UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key, - size_t len, upb_value *val) { - return upb_strtable_remove3(t, key, len, val, &upb_alloc_global); -} - -/* For NULL-terminated strings. */ -UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, - upb_value *v) { - return upb_strtable_remove2(t, key, strlen(key), v); -} +bool upb_strtable_remove(upb_strtable *t, const char *key, size_t len, + upb_value *val); /* Updates an existing entry in an inttable. If the entry does not exist, * returns false and does nothing. Unlike insert/remove, this does not * invalidate iterators. */ bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val); -/* Convenience routines for inttables with pointer keys. */ -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a); -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val); -bool upb_inttable_lookupptr( - const upb_inttable *t, const void *key, upb_value *val); - -UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key, - upb_value val) { - return upb_inttable_insertptr2(t, key, val, &upb_alloc_global); -} - /* Optimizes the table for the current set of entries, for both memory use and * lookup time. Client should call this after all entries have been inserted; * inserting more entries is legal, but will likely require a table resize. */ -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a); - -UPB_INLINE void upb_inttable_compact(upb_inttable *t) { - upb_inttable_compact2(t, &upb_alloc_global); -} - -/* A special-case inlinable version of the lookup routine for 32-bit - * integers. */ -UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key, - upb_value *v) { - *v = upb_value_int32(0); /* Silence compiler warnings. */ - if (key < t->array_size) { - upb_tabval arrval = t->array[key]; - if (upb_arrhas(arrval)) { - _upb_value_setval(v, arrval.val); - return true; - } else { - return false; - } - } else { - const upb_tabent *e; - if (t->t.entries == NULL) return false; - for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) { - if ((uint32_t)e->key == key) { - _upb_value_setval(v, e->val.val); - return true; - } - if (e->next == NULL) return false; - } - } -} +void upb_inttable_compact(upb_inttable *t, upb_arena *a); /* Exposed for testing only. */ -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a); +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a); /* Iterators ******************************************************************/ @@ -1032,10 +1028,6 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, extern "C" { #endif -#define PTR_AT(msg, ofs, type) (type*)((const char*)msg + ofs) - -typedef void upb_msg; - /** upb_msglayout *************************************************************/ /* upb_msglayout represents the memory layout of a given upb_msgdef. The @@ -1070,7 +1062,7 @@ typedef struct { _upb_field_parser *field_parser; } _upb_fasttable_entry; -typedef struct upb_msglayout { +struct upb_msglayout { const struct upb_msglayout *const* submsgs; const upb_msglayout_field *fields; /* Must be aligned to sizeof(void*). Doesn't include internal members like @@ -1082,7 +1074,7 @@ typedef struct upb_msglayout { /* To constant-initialize the tables of variable length, we need a flexible * array member, and we need to compile in C99 mode. */ _upb_fasttable_entry fasttable[]; -} upb_msglayout; +}; /** upb_msg *******************************************************************/ @@ -1137,21 +1129,18 @@ void _upb_msg_discardunknown_shallow(upb_msg *msg); bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** Hasbit access *************************************************************/ UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) { - return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; + return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; } UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); + (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); } UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) { - (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); + (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); } UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) { @@ -1177,11 +1166,11 @@ UPB_INLINE void _upb_clearhas_field(const upb_msg *msg, /** Oneof case access *********************************************************/ UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) { - return PTR_AT(msg, case_ofs, uint32_t); + return UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) { - return *PTR_AT(msg, case_ofs, uint32_t); + return *UPB_PTR_AT(msg, case_ofs, uint32_t); } UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) { @@ -1200,7 +1189,7 @@ UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg, } UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) { - return *PTR_AT(msg, ofs, const upb_msg*) != NULL; + return *UPB_PTR_AT(msg, ofs, const upb_msg*) != NULL; } UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) { @@ -1277,7 +1266,7 @@ UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size, UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, size_t *size) { - const upb_array *arr = *PTR_AT(msg, ofs, const upb_array*); + const upb_array *arr = *UPB_PTR_AT(msg, ofs, const upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_constptr(arr); @@ -1289,7 +1278,7 @@ UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, size_t *size) { - upb_array *arr = *PTR_AT(msg, ofs, upb_array*); + upb_array *arr = *UPB_PTR_AT(msg, ofs, upb_array*); if (arr) { if (size) *size = arr->len; return _upb_array_ptr(arr); @@ -1302,7 +1291,7 @@ UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size, int elem_size_lg2, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); upb_array *arr = *arr_ptr; if (!arr || arr->size < size) { return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena); @@ -1315,7 +1304,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, int elem_size_lg2, const void *value, upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *); size_t elem_size = 1 << elem_size_lg2; upb_array *arr = *arr_ptr; void *ptr; @@ -1323,7 +1312,7 @@ UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena); } ptr = _upb_array_ptr(arr); - memcpy(PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); + memcpy(UPB_PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); arr->len++; return true; } @@ -1470,20 +1459,19 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) { } UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size, - void *val, size_t val_size, upb_arena *arena) { + void *val, size_t val_size, upb_arena *a) { upb_strview strkey = _upb_map_tokey(key, key_size); upb_value tabval = {0}; - if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false; - upb_alloc *a = upb_arena_alloc(arena); + if (!_upb_map_tovalue(val, val_size, &tabval, a)) return false; /* TODO(haberman): add overwrite operation to minimize number of lookups. */ - upb_strtable_remove3(&map->table, strkey.data, strkey.size, NULL, a); - return upb_strtable_insert3(&map->table, strkey.data, strkey.size, tabval, a); + upb_strtable_remove(&map->table, strkey.data, strkey.size, NULL); + return upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a); } UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) { upb_strview k = _upb_map_tokey(key, key_size); - return upb_strtable_remove3(&map->table, k.data, k.size, NULL, NULL); + return upb_strtable_remove(&map->table, k.data, k.size, NULL); } UPB_INLINE void _upb_map_clear(upb_map *map) { @@ -1515,7 +1503,7 @@ UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs, UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key, size_t key_size, void *val, size_t val_size, upb_arena *arena) { - upb_map **map = PTR_AT(msg, ofs, upb_map *); + upb_map **map = UPB_PTR_AT(msg, ofs, upb_map *); if (!*map) { *map = _upb_map_new(arena, key_size, val_size); } @@ -1548,8 +1536,7 @@ UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) { const upb_tabent *ent = (const upb_tabent*)msg; - upb_value v; - _upb_value_setval(&v, ent->val.val); + upb_value v = {ent->val.val}; _upb_map_fromvalue(v, val, size); } @@ -1612,55 +1599,14 @@ UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, return true; } -#undef PTR_AT - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_MSG_H_ */ - -/* Must be last. */ - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - /* If set, strings will alias the input buffer instead of copying into the - * arena. */ - UPB_DECODE_ALIAS = 1, -}; - -#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) - -bool _upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena, int options); - -UPB_INLINE -bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena) { - return _upb_decode(buf, size, msg, l, arena, 0); -} - #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* UPB_DECODE_H_ */ -/* -** Internal implementation details of the decoder that are shared between -** decode.c and decode_fast.c. -*/ - -#ifndef UPB_DECODE_INT_H_ -#define UPB_DECODE_INT_H_ - -#include - +#endif /* UPB_MSG_INT_H_ */ +/** upb/upb_internal.h ************************************************************/ #ifndef UPB_INT_H_ #define UPB_INT_H_ @@ -1670,7 +1616,10 @@ typedef struct mem_block mem_block; struct upb_arena { _upb_arena_head head; - uint32_t *cleanups; + /* Stores cleanup metadata for this arena. + * - a pointer to the current cleanup counter. + * - a boolean indicating if there is an unowned initial block. */ + uintptr_t cleanup_metadata; /* Allocator to allocate arena blocks. We are responsible for freeing these * when we are destroyed. */ @@ -1792,10 +1741,11 @@ bool decode_isdone(upb_decstate *d, const char **ptr) { } } +#if UPB_FASTTABLE UPB_INLINE const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, - uint64_t hasbits, uint32_t tag) { + uint64_t hasbits, uint64_t tag) { const upb_msglayout *table_p = decode_totablep(table); uint8_t mask = table; uint64_t data; @@ -1803,8 +1753,10 @@ const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, UPB_ASSUME((idx & 7) == 0); idx >>= 3; data = table_p->fasttable[idx].field_data ^ tag; - return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data); + UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table, + hasbits, data); } +#endif UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { uint16_t tag; @@ -1837,9 +1789,11 @@ UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr, #endif /* UPB_DECODE_INT_H_ */ + +/** upb/encode.h ************************************************************/ /* -** upb_encode: parsing into a upb_msg using a upb_msglayout. -*/ + * upb_encode: parsing into a upb_msg using a upb_msglayout. + */ #ifndef UPB_ENCODE_H_ #define UPB_ENCODE_H_ @@ -1880,6 +1834,8 @@ UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, #endif #endif /* UPB_ENCODE_H_ */ + +/** upb/decode_fast.h ************************************************************/ // These are the specialized field parser functions for the fast parser. // Generated tables will refer to these by name. // @@ -2005,7 +1961,8 @@ TAGBYTES(r) #undef UPB_PARSE_PARAMS #endif /* UPB_DECODE_FAST_H_ */ -/* This file was generated by upbc (the upb compiler) from the input + +/** google/protobuf/descriptor.upb.h ************************************************************//* This file was generated by upbc (the upb compiler) from the input * file: * * google/protobuf/descriptor.proto @@ -3884,18 +3841,20 @@ UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_prot #endif /* GOOGLE_PROTOBUF_DESCRIPTOR_PROTO_UPB_H_ */ + +/** upb/def.h ************************************************************/ /* -** Defs are upb's internal representation of the constructs that can appear -** in a .proto file: -** -** - upb_msgdef: describes a "message" construct. -** - upb_fielddef: describes a message field. -** - upb_filedef: describes a .proto file and its defs. -** - upb_enumdef: describes an enum. -** - upb_oneofdef: describes a oneof. -** -** TODO: definitions of services. -*/ + * Defs are upb's internal representation of the constructs that can appear + * in a .proto file: + * + * - upb_msgdef: describes a "message" construct. + * - upb_fielddef: describes a message field. + * - upb_filedef: describes a .proto file and its defs. + * - upb_enumdef: describes an enum. + * - upb_oneofdef: describes a oneof. + * + * TODO: definitions of services. + */ #ifndef UPB_DEF_H_ #define UPB_DEF_H_ @@ -3991,9 +3950,6 @@ const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f); const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f); const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f); -/* Internal only. */ -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f); - /* upb_oneofdef ***************************************************************/ typedef upb_inttable_iter upb_oneof_iter; @@ -4078,10 +4034,6 @@ UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, return upb_msgdef_ntof(m, name, strlen(name)); } -/* Internal-only. */ -size_t upb_msgdef_selectorcount(const upb_msgdef *m); -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m); - /* Lookup of either field or oneof by name. Returns whether either was found. * If the return is true, then the found def will be set, and the non-found * one set to NULL. */ @@ -4197,6 +4149,7 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init); #endif /* UPB_DEF_H_ */ +/** upb/reflection.h ************************************************************/ #ifndef UPB_REFLECTION_H_ #define UPB_REFLECTION_H_ @@ -4278,17 +4231,9 @@ bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, const upb_symtab *ext_pool, const upb_fielddef **f, upb_msgval *val, size_t *iter); -/* Adds unknown data (serialized protobuf data) to the given message. The data - * is copied into the message instance. */ -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena); - /* Clears all unknown field data from this message and all submessages. */ bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth); -/* Returns a reference to the message's unknown data. */ -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); - /** upb_array *****************************************************************/ /* Creates a new array on the given arena that holds elements of this type. */ @@ -4370,6 +4315,7 @@ void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); #endif /* UPB_REFLECTION_H_ */ +/** upb/json_decode.h ************************************************************/ #ifndef UPB_JSONDECODE_H_ #define UPB_JSONDECODE_H_ @@ -4392,6 +4338,7 @@ bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, #endif /* UPB_JSONDECODE_H_ */ +/** upb/json_encode.h ************************************************************/ #ifndef UPB_JSONENCODE_H_ #define UPB_JSONENCODE_H_ @@ -4426,27 +4373,39 @@ size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, #endif #endif /* UPB_JSONENCODE_H_ */ + +/** upb/port_undef.inc ************************************************************/ /* See port_def.inc. This should #undef all macros #defined there. */ -#undef UPB_MAPTYPE_STRING #undef UPB_SIZE #undef UPB_PTR_AT #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF +#undef UPB_MAPTYPE_STRING #undef UPB_INLINE #undef UPB_ALIGN_UP #undef UPB_ALIGN_DOWN #undef UPB_ALIGN_MALLOC #undef UPB_ALIGN_OF +#undef UPB_LIKELY +#undef UPB_UNLIKELY #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN +#undef UPB_PRINTF #undef UPB_MAX #undef UPB_MIN #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT #undef UPB_UNREACHABLE +#undef UPB_SETJMP +#undef UPB_LONGJMP +#undef UPB_PTRADD +#undef UPB_MUSTTAIL +#undef UPB_FASTTABLE_SUPPORTED +#undef UPB_FASTTABLE +#undef UPB_FASTTABLE_INIT #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN diff --git a/src/README.md b/src/README.md index 78d6bb5ec7..ec4901d894 100644 --- a/src/README.md +++ b/src/README.md @@ -229,4 +229,4 @@ Usage The complete documentation for Protocol Buffers is available via the web at: - https://developers.google.com/protocol-buffers/ +https://developers.google.com/protocol-buffers/