From 537b6f42c20c0dd797703ecb89c61bb130b33365 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 9 Oct 2020 17:00:08 -0700 Subject: [PATCH] A few updates to the benchamrk and minor implementation changes. --- tests/benchmark.cc | 59 +++++++++++++++++++++++++++++++++------------- upb/decode.h | 9 ++++--- upb/decode_fast.c | 29 +++++++++++++---------- 3 files changed, 65 insertions(+), 32 deletions(-) diff --git a/tests/benchmark.cc b/tests/benchmark.cc index c4b1abfeaa..78dbeb65cc 100644 --- a/tests/benchmark.cc +++ b/tests/benchmark.cc @@ -1,9 +1,12 @@ -#include #include + +#include + +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/arena.h" #include "google/protobuf/descriptor.upb.h" #include "google/protobuf/descriptor.upbdefs.h" -#include "google/protobuf/descriptor.pb.h" upb_strview descriptor = google_protobuf_descriptor_proto_upbdefinit.descriptor; @@ -28,10 +31,10 @@ static void BM_ArenaInitialBlockOneAlloc(benchmark::State& state) { } BENCHMARK(BM_ArenaInitialBlockOneAlloc); -static void BM_ParseDescriptorNoHeap(benchmark::State& state) { +static void BM_ParseDescriptor_Upb(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { - upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL); + upb_arena* arena = upb_arena_new(); google_protobuf_FileDescriptorProto* set = google_protobuf_FileDescriptorProto_parse(descriptor.data, descriptor.size, arena); @@ -41,16 +44,15 @@ static void BM_ParseDescriptorNoHeap(benchmark::State& state) { } bytes += descriptor.size; upb_arena_free(arena); - //fprintf(stderr, "+++ finished parse: %zu\n", descriptor.size); } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptorNoHeap); +BENCHMARK(BM_ParseDescriptor_Upb); -static void BM_ParseDescriptor(benchmark::State& state) { +static void BM_ParseDescriptor_Upb_LargeInitialBlock(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { - upb_arena* arena = upb_arena_new(); + upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL); google_protobuf_FileDescriptorProto* set = google_protobuf_FileDescriptorProto_parse(descriptor.data, descriptor.size, arena); @@ -60,16 +62,17 @@ static void BM_ParseDescriptor(benchmark::State& state) { } bytes += descriptor.size; upb_arena_free(arena); + //fprintf(stderr, "+++ finished parse: %zu\n", descriptor.size); } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptor); +BENCHMARK(BM_ParseDescriptor_Upb_LargeInitialBlock); -static void BM_ParseDescriptorProto2NoArena(benchmark::State& state) { +static void BM_ParseDescriptor_Proto2_NoArena(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { google::protobuf::FileDescriptorProto proto; - bool ok = proto.ParseFromArray(descriptor.data, descriptor.size); + bool ok = proto.ParsePartialFromArray(descriptor.data, descriptor.size); if (!ok) { printf("Failed to parse.\n"); @@ -79,15 +82,37 @@ static void BM_ParseDescriptorProto2NoArena(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptorProto2NoArena); +BENCHMARK(BM_ParseDescriptor_Proto2_NoArena); -static void BM_ParseDescriptorProto2WithArena(benchmark::State& state) { +static void BM_ParseDescriptor_Proto2_Arena(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { google::protobuf::Arena arena; - auto proto = google::protobuf::Arena::CreateMessage< - google::protobuf::FileDescriptorProto>(&arena); - bool ok = proto->ParseFromArray(descriptor.data, descriptor.size); + arena.Reset(); + auto proto = google::protobuf::Arena::CreateMessage(&arena); + bool ok = proto->ParsePartialFromArray(descriptor.data, descriptor.size); + + if (!ok) { + printf("Failed to parse.\n"); + exit(1); + } + bytes += descriptor.size; + } + state.SetBytesProcessed(state.iterations() * descriptor.size); +} +BENCHMARK(BM_ParseDescriptor_Proto2_Arena); + +static void BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock(benchmark::State& state) { + size_t bytes = 0; + //fprintf(stderr, "size: %d\n", (int)descriptor.size); + google::protobuf::ArenaOptions options; + options.initial_block = buf; + options.initial_block_size = sizeof(buf); + for (auto _ : state) { + google::protobuf::Arena arena(options); + arena.Reset(); + auto proto = google::protobuf::Arena::CreateMessage(&arena); + bool ok = proto->ParsePartialFromArray(descriptor.data, descriptor.size); if (!ok) { printf("Failed to parse.\n"); @@ -97,7 +122,7 @@ static void BM_ParseDescriptorProto2WithArena(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptorProto2WithArena); +BENCHMARK(BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock); static void BM_SerializeDescriptorProto2(benchmark::State& state) { size_t bytes = 0; diff --git a/upb/decode.h b/upb/decode.h index e79f3341ab..372e44b39d 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -41,11 +41,15 @@ const char *fastdecode_err(upb_decstate *d); void *decode_mallocfallback(upb_decstate *d, size_t size); +UPB_FORCEINLINE bool decode_arenahas(upb_decstate *d, size_t bytes) { + return (size_t)(d->arena_end - d->arena_ptr) >= bytes; +} + UPB_FORCEINLINE static void *decode_malloc(upb_decstate *d, size_t size) { UPB_ASSERT((size & 7) == 0); char *ptr = d->arena_ptr; - if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < size)) { + if (UPB_UNLIKELY(!decode_arenahas(d, size))) { return decode_mallocfallback(d, size); } d->arena_ptr += size; @@ -57,8 +61,7 @@ upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l, int msg_ceil_bytes) { size_t size = l->size + sizeof(upb_msg_internal); char *msg_data; - if (msg_ceil_bytes > 0 && - (size_t)(d->arena_end - d->arena_ptr) >= (size_t)msg_ceil_bytes) { + if (msg_ceil_bytes > 0 && decode_arenahas(d, msg_ceil_bytes)) { UPB_ASSERT(size <= (size_t)msg_ceil_bytes); msg_data = d->arena_ptr; memset(msg_data, 0, msg_ceil_bytes); diff --git a/upb/decode_fast.c b/upb/decode_fast.c index bae1c223f7..207a930351 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -14,10 +14,9 @@ return fastdecode_generic(UPB_PARSE_ARGS); typedef enum { - CARD_s = 0, - CARD_o = 1, - CARD_r = 2, - CARD_p = 3 + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ } upb_card; UPB_FORCEINLINE @@ -191,7 +190,7 @@ UPB_FORCEINLINE static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, int valbytes, upb_card card, bool zigzag, _upb_field_parser **funcs) { - uint64_t val = 0; + uint64_t val; void *dst; if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { RETURN_GENERIC("varint field tag mismatch\n"); @@ -296,7 +295,7 @@ TAGBYTES(o) /* string fields **************************************************************/ UPB_FORCEINLINE -bool fastdecode_boundscheck(const char *ptr, unsigned len, const char *end) { +bool fastdecode_boundscheck(const char *ptr, size_t len, const char *end) { uintptr_t uptr = (uintptr_t)ptr; uintptr_t uend = (uintptr_t)end; uintptr_t res = uptr + len; @@ -314,15 +313,13 @@ static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes, sizeof(upb_strview), card); - len = ptr[tagbytes]; - if (UPB_UNLIKELY(len < 0)) { - RETURN_GENERIC("string field len >1 byte\n"); - } + len = (int8_t)ptr[tagbytes]; ptr += tagbytes + 1; dst->data = ptr; dst->size = len; if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, d->limit))) { - return fastdecode_err(d); + dst->size = 0; + RETURN_GENERIC("string field len >1 byte\n"); } ptr += len; return fastdecode_dispatch(d, ptr, msg, table, hasbits); @@ -346,6 +343,14 @@ const char *upb_pos_2bt(UPB_PARSE_PARAMS) { /* message fields *************************************************************/ +UPB_FORCEINLINE +bool fastdecode_boundscheck2(const char *ptr, unsigned len, const char *end) { + uintptr_t uptr = (uintptr_t)ptr; + uintptr_t uend = (uintptr_t)end; + uintptr_t res = uptr + len; + return res < uptr || res > uend; +} + UPB_FORCEINLINE static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, int msg_ceil_bytes, upb_card card) { @@ -415,7 +420,7 @@ again: ptr++; } ptr += tagbytes + 1; - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, saved_limit))) { + if (UPB_UNLIKELY(fastdecode_boundscheck2(ptr, len, saved_limit))) { return fastdecode_err(d); } d->limit = ptr + len;