[EventEngine][Windows] Temporary changes for rare-flake debugging (#33894)

CNR a WindowsEventEngine listener flake in:
 * 10k local Windows development machine runs
 * 50k Windows RBE runs
 * 10k Windows VM runs

It fails ~5 times per day on the master CI jobs.

This PR adds some logging to try to see if an edge is missed, and
switches the thread pool implementation to see if that makes the flake
go away. If the flakes disappear, I'll try removing one or the other to
see if either independently fix the problem (hopefully not logging).

---------

Co-authored-by: drfloob <drfloob@users.noreply.github.com>
revert-33442-printExp
AJ Heller 2 years ago committed by GitHub
parent aca70e8cca
commit 0897f0faf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      src/core/lib/event_engine/thread_pool/thread_pool_factory.cc
  2. 10
      src/core/lib/surface/completion_queue.cc
  3. 6
      test/core/end2end/tests/no_logging.cc

@ -29,6 +29,13 @@ namespace grpc_event_engine {
namespace experimental {
std::shared_ptr<ThreadPool> MakeThreadPool(size_t reserve_threads) {
// TODO(hork): remove when the listener flake is identified
#ifdef GPR_WINDOWS
if (grpc_core::IsEventEngineListenerEnabled()) {
return std::make_shared<WorkStealingThreadPool>(
grpc_core::Clamp(gpr_cpu_num_cores(), 2u, 16u));
}
#endif
if (grpc_core::IsWorkStealingEnabled()) {
return std::make_shared<WorkStealingThreadPool>(
grpc_core::Clamp(gpr_cpu_num_cores(), 2u, 16u));

@ -57,6 +57,10 @@
#include "src/core/lib/surface/api_trace.h"
#include "src/core/lib/surface/event_string.h"
#ifdef GPR_WINDOWS
#include "src/core/lib/experiments/experiments.h"
#endif
grpc_core::TraceFlag grpc_trace_operation_failures(false, "op_failure");
grpc_core::DebugOnlyTraceFlag grpc_trace_pending_tags(false, "pending_tags");
grpc_core::DebugOnlyTraceFlag grpc_trace_cq_refcount(false, "cq_refcount");
@ -882,6 +886,12 @@ void grpc_cq_end_op(grpc_completion_queue* cq, void* tag,
void (*done)(void* done_arg, grpc_cq_completion* storage),
void* done_arg, grpc_cq_completion* storage,
bool internal) {
// TODO(hork): remove when the listener flake is identified
#ifdef GPR_WINDOWS
if (grpc_core::IsEventEngineListenerEnabled()) {
gpr_log(GPR_ERROR, "cq_end_op called for tag %d (0x%p)", tag, tag);
}
#endif
cq->vtable->end_op(cq, tag, error, done, done_arg, storage, internal);
}

@ -139,6 +139,12 @@ void SimpleRequest(CoreEnd2endTest& test) {
}
CORE_END2END_TEST(NoLoggingTest, NoLoggingTest) {
// TODO(hork): remove when the listener flake is identified
#ifdef GPR_WINDOWS
if (IsEventEngineListenerEnabled()) {
GTEST_SKIP() << "not for windows + event engine listener";
}
#endif
Verifier verifier;
verifier.FailOnNonErrorLog();
for (int i = 0; i < 10; i++) {

Loading…
Cancel
Save