Added watchdog support for a Multi-Kill threshold. (#12108)

WatchDog will now kill if max(2, registered_threads * multi_kill_threshold) threads have gone above the multikill_timeout.

Signed-off-by: Kevin Baichoo <kbaichoo@google.com>

Mirrored from https://github.com/envoyproxy/envoy @ 7f78581116ecdc9dcca319ebe68d4c8ac1d817ba
master-ci-test
data-plane-api(CircleCI) 5 years ago
parent 63d84df73f
commit 1d167872c4
  1. 1
      envoy/config/bootstrap/v3/BUILD
  2. 14
      envoy/config/bootstrap/v3/bootstrap.proto
  3. 1
      envoy/config/bootstrap/v4alpha/BUILD
  4. 14
      envoy/config/bootstrap/v4alpha/bootstrap.proto

@ -15,6 +15,7 @@ api_proto_package(
"//envoy/config/overload/v3:pkg",
"//envoy/config/trace/v3:pkg",
"//envoy/extensions/transport_sockets/tls/v3:pkg",
"//envoy/type/v3:pkg",
"@com_github_cncf_udpa//udpa/annotations:pkg",
"@com_github_cncf_udpa//udpa/core/v1:pkg",
],

@ -14,6 +14,7 @@ import "envoy/config/metrics/v3/stats.proto";
import "envoy/config/overload/v3/overload.proto";
import "envoy/config/trace/v3/http_tracer.proto";
import "envoy/extensions/transport_sockets/tls/v3/secret.proto";
import "envoy/type/v3/percent.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/struct.proto";
@ -297,6 +298,7 @@ message ClusterManager {
// Envoy process watchdog configuration. When configured, this monitors for
// nonresponsive threads and kills the process after the configured thresholds.
// See the :ref:`watchdog documentation <operations_performance_watchdog>` for more information.
// [#next-free-field: 6]
message Watchdog {
option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v2.Watchdog";
@ -314,10 +316,16 @@ message Watchdog {
// kill behavior. If not specified the default is 0 (disabled).
google.protobuf.Duration kill_timeout = 3;
// If at least two watched threads have been nonresponsive for at least this
// duration assume a true deadlock and kill the entire Envoy process. Set to 0
// to disable this behavior. If not specified the default is 0 (disabled).
// If max(2, ceil(registered_threads * Fraction(*multikill_threshold*)))
// threads have been nonresponsive for at least this duration kill the entire
// Envoy process. Set to 0 to disable this behavior. If not specified the
// default is 0 (disabled).
google.protobuf.Duration multikill_timeout = 4;
// Sets the threshold for *multikill_timeout* in terms of the percentage of
// nonresponsive threads required for the *multikill_timeout*.
// If not specified the default is 0.
type.v3.Percent multikill_threshold = 5;
}
// Runtime :ref:`configuration overview <config_runtime>` (deprecated).

@ -14,6 +14,7 @@ api_proto_package(
"//envoy/config/metrics/v4alpha:pkg",
"//envoy/config/overload/v3:pkg",
"//envoy/extensions/transport_sockets/tls/v4alpha:pkg",
"//envoy/type/v3:pkg",
"@com_github_cncf_udpa//udpa/annotations:pkg",
"@com_github_cncf_udpa//udpa/core/v1:pkg",
],

@ -13,6 +13,7 @@ import "envoy/config/listener/v4alpha/listener.proto";
import "envoy/config/metrics/v4alpha/stats.proto";
import "envoy/config/overload/v3/overload.proto";
import "envoy/extensions/transport_sockets/tls/v4alpha/secret.proto";
import "envoy/type/v3/percent.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/struct.proto";
@ -288,6 +289,7 @@ message ClusterManager {
// Envoy process watchdog configuration. When configured, this monitors for
// nonresponsive threads and kills the process after the configured thresholds.
// See the :ref:`watchdog documentation <operations_performance_watchdog>` for more information.
// [#next-free-field: 6]
message Watchdog {
option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v3.Watchdog";
@ -305,10 +307,16 @@ message Watchdog {
// kill behavior. If not specified the default is 0 (disabled).
google.protobuf.Duration kill_timeout = 3;
// If at least two watched threads have been nonresponsive for at least this
// duration assume a true deadlock and kill the entire Envoy process. Set to 0
// to disable this behavior. If not specified the default is 0 (disabled).
// If max(2, ceil(registered_threads * Fraction(*multikill_threshold*)))
// threads have been nonresponsive for at least this duration kill the entire
// Envoy process. Set to 0 to disable this behavior. If not specified the
// default is 0 (disabled).
google.protobuf.Duration multikill_timeout = 4;
// Sets the threshold for *multikill_timeout* in terms of the percentage of
// nonresponsive threads required for the *multikill_timeout*.
// If not specified the default is 0.
type.v3.Percent multikill_threshold = 5;
}
// Runtime :ref:`configuration overview <config_runtime>` (deprecated).

Loading…
Cancel
Save