mirror of https://github.com/grpc/grpc.git
Assert that pthread_join succeeds. (#32584)
Long story here:
CallbackAlternativeCQ operates a thread pool which processes a
completion queue and then directly invokes the completion function in
the same thread. This thread pool is initialized on first Ref() and
unallocated on last Unref().
When running an in-process synchronous server (as we do for tests, using
this
https://github.com/google/tensorstore/blob/master/tensorstore/internal/grpc/grpc_mock.h),
called by an async() interface caller, if the async() callback happens
to drop the last reference to the grpc Channel, then the channel
shutdown will attempt to run in one of the the CallbackAlternativeCQ
threads.
This will cause a deadlock/race condition, as `CallbackAlternativeCQ` is
not designed to shutdown itself. When this deadlock happens,
`pthread_join(pthread_id_)` will return `EDEADLK` and the thread will
keep running. However `EDEADLK` is silently ignored by Join() so
`CallbackAlternativeCQ::Unref` will continue to delete the underlying
grpc_completion_queue, leading to a `SIGSEGV` later in the process.
97ba987132/src/cpp/common/completion_queue_cc.cc (L115)
This adds an assert that pthread_join succeeded, which is useful as it
avoids a later SIGSEBV. Alternatively, the thread implementation could
gpr_log the errorcode before asserting.
Example backtrace of crash:
frame #0: 0x0000000194f1e868 libsystem_kernel.dylib`__pthread_kill + 8
frame #1: 0x0000000194f55cec libsystem_pthread.dylib`pthread_kill + 288
frame #2: 0x0000000194e8e2c8 libsystem_c.dylib`abort + 180
frame #3: 0x0000000194e8d620 libsystem_c.dylib`__assert_rtn + 272
frame #4: 0x0000000100a64f50 grpc_kvstore_test`grpc_core::(anonymous
namespace)::ThreadInternalsPosix::Join() + 188
frame #5: 0x00000001009c5dd0 grpc_kvstore_test`grpc_core::Thread::Join()
+ 56
frame #6: 0x0000000100154474 grpc_kvstore_test`grpc::(anonymous
namespace)::CallbackAlternativeCQ::Unref() + 216
frame #7: 0x0000000100154390
grpc_kvstore_test`grpc::CompletionQueue::ReleaseCallbackAlternativeCQ(grpc::CompletionQueue*)
+ 120
frame #8: 0x000000010014130c grpc_kvstore_test`grpc::Channel::~Channel()
+ 220
frame #9: 0x00000001001413c8 grpc_kvstore_test`grpc::Channel::~Channel()
+ 28
frame #10: 0x000000010014d678
grpc_kvstore_test`std::__1::default_delete<grpc::Channel>::operator()(grpc::Channel*)
const + 44
frame #11: 0x000000010014d358
grpc_kvstore_test`std::__1::__shared_ptr_pointer<grpc::Channel*,
std::__1::shared_ptr<grpc::Channel>::__shared_ptr_default_delete<grpc::Channel,
grpc::Channel>, std::__1::allocator<grpc::Channel> >::__on_zero_shared()
+ 72
frame #12: 0x000000010002ab5c
grpc_kvstore_test`std::__1::__shared_count::__release_shared() + 60
frame #13: 0x000000010002ab00
grpc_kvstore_test`std::__1::__shared_weak_count::__release_shared() + 28
frame #14: 0x000000010002aad0
grpc_kvstore_test`std::__1::shared_ptr<grpc::ServerCredentials>::~shared_ptr()
+ 56
frame #15: 0x00000001000053ec
grpc_kvstore_test`std::__1::shared_ptr<tensorstore_grpc::kvstore::grpc_gen::KvStoreService::Stub>::~shared_ptr()
+ 28
frame #16: 0x000000010014653c
grpc_kvstore_test`grpc::ClientContext::~ClientContext() + 356
frame #17: 0x0000000100146570
grpc_kvstore_test`grpc::ClientContext::~ClientContext() + 28
frame #18: 0x00000001000ab000 grpc_kvstore_test`tensorstore::(anonymous
namespace)::ReadTask::~ReadTask() + 68
frame #19: 0x00000001000aae90 grpc_kvstore_test`tensorstore::(anonymous
namespace)::ReadTask::~ReadTask() + 28
frame #20: 0x00000001000aae18
grpc_kvstore_test`tensorstore::internal::intrusive_ptr_decrement(tensorstore::internal::AtomicReferenceCount<tensorstore::(anonymous
namespace)::ReadTask> const*) + 68
frame #21: 0x00000001000aadc8 grpc_kvstore_test`void
tensorstore::internal::DefaultIntrusivePtrTraits::decrement<tensorstore::(anonymous
namespace)::ReadTask*>(tensorstore::(anonymous namespace)::ReadTask*) +
24
frame #22: 0x00000001000aad9c
grpc_kvstore_test`tensorstore::internal::IntrusivePtr<tensorstore::(anonymous
namespace)::ReadTask,
tensorstore::internal::DefaultIntrusivePtrTraits>::~IntrusivePtr() + 52
frame #23: 0x00000001000a5994
grpc_kvstore_test`tensorstore::internal::IntrusivePtr<tensorstore::(anonymous
namespace)::ReadTask,
tensorstore::internal::DefaultIntrusivePtrTraits>::~IntrusivePtr() + 28
frame #24: 0x00000001000aac24 grpc_kvstore_test`tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status)::~() + 40
frame #25: 0x00000001000a6280 grpc_kvstore_test`tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status)::~() + 28
frame #26: 0x00000001000a84ac
grpc_kvstore_test`std::__1::__compressed_pair_elem<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status), 0,
false>::~__compressed_pair_elem() + 28
frame #27: 0x00000001000a86c0
grpc_kvstore_test`std::__1::__compressed_pair<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status),
std::__1::allocator<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status)> >::~__compressed_pair() + 28
frame #28: 0x00000001000a8694
grpc_kvstore_test`std::__1::__compressed_pair<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status),
std::__1::allocator<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status)> >::~__compressed_pair() + 28
frame #29: 0x00000001000a990c
grpc_kvstore_test`std::__1::__function::__alloc_func<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status),
std::__1::allocator<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status)>, void (grpc::Status)>::destroy() +
24
frame #30: 0x00000001000a7ea0
grpc_kvstore_test`std::__1::__function::__func<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status),
std::__1::allocator<tensorstore::(anonymous
namespace)::ReadTask::Start(tensorstore_grpc::kvstore::grpc_gen::KvStoreService::StubInterface*,
absl::Time)::'lambda'(grpc::Status)>, void (grpc::Status)>::destroy() +
28
frame #31: 0x00000001000aabbc
grpc_kvstore_test`std::__1::__function::__value_func<void
(grpc::Status)>::~__value_func() + 68
frame #32: 0x00000001000aab68
grpc_kvstore_test`std::__1::__function::__value_func<void
(grpc::Status)>::~__value_func() + 28
frame #33: 0x00000001000aab3c grpc_kvstore_test`std::__1::function<void
(grpc::Status)>::~function() + 28
frame #34: 0x00000001000a6254 grpc_kvstore_test`std::__1::function<void
(grpc::Status)>::~function() + 28
frame #35: 0x0000000100108ae0
grpc_kvstore_test`grpc::internal::CallbackWithStatusTag::Run(bool) + 368
frame #36: 0x0000000100108964
grpc_kvstore_test`grpc::internal::CallbackWithStatusTag::StaticRun(grpc_completion_queue_functor*,
int) + 44
frame #37: 0x0000000100154cb0 grpc_kvstore_test`grpc::(anonymous
namespace)::CallbackAlternativeCQ::ThreadLoop(void*) + 356
frame #38: 0x0000000100a650b8 grpc_kvstore_test`grpc_core::(anonymous
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void
(*)(void*), void*, bool*, grpc_core::Thread::Options
const&)::'lambda'(void*)::operator()(void*) const + 240
frame #39: 0x0000000100a64fbc grpc_kvstore_test`grpc_core::(anonymous
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void
(*)(void*), void*, bool*, grpc_core::Thread::Options
const&)::'lambda'(void*)::__invoke(void*) + 28
frame #40: 0x0000000194f5606c libsystem_pthread.dylib`_pthread_start +
148
pull/32675/head
parent
85f7aa3468
commit
3598c9f6f0
1 changed files with 7 additions and 1 deletions
Loading…
Reference in new issue