Merge pull request #23164 from apolcyn/add_overall_deadline_to_soak_test

Add an overall deadline option to interop soak tests to prevent test runner timeouts
pull/23185/head
apolcyn 5 years ago committed by GitHub
commit 84d4fca6d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 12
      test/cpp/interop/client.cc
  2. 44
      test/cpp/interop/interop_client.cc
  3. 9
      test/cpp/interop/interop_client.h

@ -96,7 +96,11 @@ DEFINE_int32(soak_max_failures, 0,
"per-iteration max acceptable latency).");
DEFINE_int32(soak_per_iteration_max_acceptable_latency_ms, 0,
"The number of milliseconds a single iteration in the two soak "
"tests (rpc_soak and channel_soak) is allowed to take.");
"tests (rpc_soak and channel_soak) should take.");
DEFINE_int32(soak_overall_timeout_seconds, 0,
"The overall number of seconds after which a soak test should "
"stop and fail, if the desired number of iterations have not yet "
"completed.");
DEFINE_int32(iteration_interval, 10,
"The interval in seconds between rpcs. This is used by "
"long_connection test");
@ -265,11 +269,13 @@ int main(int argc, char** argv) {
actions["channel_soak"] =
std::bind(&grpc::testing::InteropClient::DoChannelSoakTest, &client,
FLAGS_soak_iterations, FLAGS_soak_max_failures,
FLAGS_soak_per_iteration_max_acceptable_latency_ms);
FLAGS_soak_per_iteration_max_acceptable_latency_ms,
FLAGS_soak_overall_timeout_seconds);
actions["rpc_soak"] =
std::bind(&grpc::testing::InteropClient::DoRpcSoakTest, &client,
FLAGS_soak_iterations, FLAGS_soak_max_failures,
FLAGS_soak_per_iteration_max_acceptable_latency_ms);
FLAGS_soak_per_iteration_max_acceptable_latency_ms,
FLAGS_soak_overall_timeout_seconds);
actions["long_lived_channel"] =
std::bind(&grpc::testing::InteropClient::DoLongLivedChannelTest, &client,
FLAGS_soak_iterations, FLAGS_iteration_interval);

@ -1106,16 +1106,25 @@ InteropClient::PerformOneSoakTestIteration(
void InteropClient::PerformSoakTest(
const bool reset_channel_per_iteration, const int32_t soak_iterations,
const int32_t max_failures,
const int32_t max_acceptable_per_iteration_latency_ms) {
const int32_t max_acceptable_per_iteration_latency_ms,
const int32_t overall_timeout_seconds) {
std::vector<std::tuple<bool, int32_t, std::string>> results;
grpc_histogram* latencies_ms_histogram = grpc_histogram_create(
1 /* resolution */,
500 * 1e3 /* largest bucket; 500 seconds is unlikely */);
for (int i = 0; i < soak_iterations; ++i) {
gpr_timespec overall_deadline = gpr_time_add(
gpr_now(GPR_CLOCK_MONOTONIC),
gpr_time_from_seconds(overall_timeout_seconds, GPR_TIMESPAN));
int32_t iterations_ran = 0;
for (int i = 0;
i < soak_iterations &&
gpr_time_cmp(gpr_now(GPR_CLOCK_MONOTONIC), overall_deadline) < 0;
++i) {
auto result = PerformOneSoakTestIteration(
reset_channel_per_iteration, max_acceptable_per_iteration_latency_ms);
results.push_back(result);
grpc_histogram_add(latencies_ms_histogram, std::get<1>(result));
iterations_ran++;
}
int total_failures = 0;
for (size_t i = 0; i < results.size(); i++) {
@ -1137,7 +1146,24 @@ void InteropClient::PerformSoakTest(
grpc_histogram_percentile(latencies_ms_histogram, 90);
double latency_ms_worst = grpc_histogram_maximum(latencies_ms_histogram);
grpc_histogram_destroy(latencies_ms_histogram);
if (total_failures > max_failures) {
if (iterations_ran < soak_iterations) {
gpr_log(
GPR_ERROR,
"soak test consumed all %d seconds of time and quit early, only "
"having ran %d out of desired %d iterations. "
"total_failures: %d. "
"max_failures_threshold: %d. "
"median_soak_iteration_latency: %lf ms. "
"90th_soak_iteration_latency: %lf ms. "
"worst_soak_iteration_latency: %lf ms. "
"Some or all of the iterations that did run were unexpectedly slow. "
"See breakdown above for which iterations succeeded, failed, and "
"why for more info.",
overall_timeout_seconds, iterations_ran, soak_iterations,
total_failures, max_failures, latency_ms_median, latency_ms_90th,
latency_ms_worst);
GPR_ASSERT(0);
} else if (total_failures > max_failures) {
gpr_log(GPR_ERROR,
"soak test ran: %d iterations. total_failures: %d exceeds "
"max_failures_threshold: %d. "
@ -1165,23 +1191,27 @@ void InteropClient::PerformSoakTest(
bool InteropClient::DoRpcSoakTest(
int32_t soak_iterations, int32_t max_failures,
int64_t max_acceptable_per_iteration_latency_ms) {
int64_t max_acceptable_per_iteration_latency_ms,
int32_t overall_timeout_seconds) {
gpr_log(GPR_DEBUG, "Sending %d RPCs...", soak_iterations);
GPR_ASSERT(soak_iterations > 0);
PerformSoakTest(false /* reset channel per iteration */, soak_iterations,
max_failures, max_acceptable_per_iteration_latency_ms);
max_failures, max_acceptable_per_iteration_latency_ms,
overall_timeout_seconds);
gpr_log(GPR_DEBUG, "rpc_soak test done.");
return true;
}
bool InteropClient::DoChannelSoakTest(
int32_t soak_iterations, int32_t max_failures,
int64_t max_acceptable_per_iteration_latency_ms) {
int64_t max_acceptable_per_iteration_latency_ms,
int32_t overall_timeout_seconds) {
gpr_log(GPR_DEBUG, "Sending %d RPCs, tearing down the channel each time...",
soak_iterations);
GPR_ASSERT(soak_iterations > 0);
PerformSoakTest(true /* reset channel per iteration */, soak_iterations,
max_failures, max_acceptable_per_iteration_latency_ms);
max_failures, max_acceptable_per_iteration_latency_ms,
overall_timeout_seconds);
gpr_log(GPR_DEBUG, "channel_soak test done.");
return true;
}

@ -77,9 +77,11 @@ class InteropClient {
// but at some point in the future, might be codified and implemented in all
// languages
bool DoChannelSoakTest(int32_t soak_iterations, int32_t max_failures,
int64_t max_acceptable_per_iteration_latency_ms);
int64_t max_acceptable_per_iteration_latency_ms,
int32_t overall_timeout_seconds);
bool DoRpcSoakTest(int32_t soak_iterations, int32_t max_failures,
int64_t max_acceptable_per_iteration_latency_ms);
int64_t max_acceptable_per_iteration_latency_ms,
int32_t overall_timeout_seconds);
bool DoLongLivedChannelTest(int32_t soak_iterations,
int32_t iteration_interval);
@ -137,7 +139,8 @@ class InteropClient {
void PerformSoakTest(const bool reset_channel_per_iteration,
const int32_t soak_iterations,
const int32_t max_failures,
const int32_t max_acceptable_per_iteration_latency_ms);
const int32_t max_acceptable_per_iteration_latency_ms,
const int32_t overall_timeout_seconds);
ServiceStub serviceStub_;
/// If true, abort() is not called for transient failures

Loading…
Cancel
Save