syntax = "proto3"; package envoy.service.discovery.v2; option java_generic_services = true; import "envoy/api/v2/core/base.proto"; import "envoy/api/v2/core/health_check.proto"; import "envoy/api/v2/endpoint/endpoint.proto"; import "google/api/annotations.proto"; import "google/protobuf/duration.proto"; // [#proto-status: experimental] // HDS is Health Discovery Service. It compliments Envoy’s health checking // service by designating this Envoy to be a healthchecker for a subset of hosts // in the cluster. The status of these health checks will be reported to the // management server, where it can be aggregated etc and redistributed back to // Envoy through EDS. service HealthDiscoveryService { // 1. Envoy starts up and if its can_healthcheck option in the static // bootstrap config is enabled, sends HealthCheckRequest to the management // server. It supplies its capabilities (which protocol it can health check // with, what zone it resides in, etc.). // 2. In response to (1), the management server designates this Envoy as a // healthchecker to health check a subset of all upstream hosts for a given // cluster (for example upstream Host 1 and Host 2). It streams // HealthCheckSpecifier messages with cluster related configuration for all // clusters this Envoy is designated to health check. Subsequent // HealthCheckSpecifier message will be sent on changes to: // a. Endpoints to health checks // b. Per cluster configuration change // 3. Envoy creates a health probe based on the HealthCheck config and sends // it to endpoint(ip:port) of Host 1 and 2. Based on the HealthCheck // configuration Envoy waits upon the arrival of the probe response and // looks at the content of the response to decide whether the endpoint is // healthy or not. If a response hasn’t been received within the timeout // interval, the endpoint health status is considered TIMEOUT. // 4. Envoy reports results back in an EndpointHealthResponse message. // Envoy streams responses as often as the interval configured by the // management server in HealthCheckSpecifier. // 5. The management Server collects health statuses for all endpoints in the // cluster (for all clusters) and uses this information to construct // EndpointDiscoveryResponse messages. // 6. Once Envoy has a list of upstream endpoints to send traffic to, it load // balances traffic to them without additional health checking. It may // use inline healthcheck (i.e. consider endpoint UNHEALTHY if connection // failed to a particular endpoint to account for health status propagation // delay between HDS and EDS). // By default, can_healthcheck is true. If can_healthcheck is false, Cluster // configuration may not contain HealthCheck message. // TODO(htuch): How is can_healthcheck communicated to CDS to ensure the above // invariant? // TODO(htuch): Add @amb67's diagram. rpc StreamHealthCheck(stream HealthCheckRequestOrEndpointHealthResponse) returns (stream HealthCheckSpecifier) { } // TODO(htuch): Unlike the gRPC version, there is no stream-based binding of // request/response. Should we add an identifier to the HealthCheckSpecifier // to bind with the response? rpc FetchHealthCheck(HealthCheckRequestOrEndpointHealthResponse) returns (HealthCheckSpecifier) { option (google.api.http) = { post: "/v2/discovery:health_check" body: "*" }; } } // Defines supported protocols etc, so the management server can assign proper // endpoints to healthcheck. message Capability { // Different Envoy instances may have different capabilities (e.g. Redis) // and/or have ports enabled for different protocols. enum Protocol { HTTP = 0; TCP = 1; REDIS = 2; } repeated Protocol health_check_protocol = 1; } message HealthCheckRequest { envoy.api.v2.core.Node node = 1; Capability capability = 2; } message EndpointHealth { envoy.api.v2.endpoint.Endpoint endpoint = 1; envoy.api.v2.core.HealthStatus health_status = 2; } message EndpointHealthResponse { repeated EndpointHealth endpoints_health = 1; } message HealthCheckRequestOrEndpointHealthResponse { oneof request_type { HealthCheckRequest health_check_request = 1; EndpointHealthResponse endpoint_health_response = 2; } } message LocalityEndpoints { envoy.api.v2.core.Locality locality = 1; repeated envoy.api.v2.endpoint.Endpoint endpoints = 2; } // The cluster name and locality is provided to Envoy for the endpoints that it // health checks to support statistics reporting, logging and debugging by the // Envoy instance (outside of HDS). For maximum usefulness, it should match the // same cluster structure as that provided by EDS. message ClusterHealthCheck { string cluster_name = 1; repeated envoy.api.v2.core.HealthCheck health_checks = 2; repeated LocalityEndpoints endpoints = 3; } message HealthCheckSpecifier { repeated ClusterHealthCheck health_check = 1; // The default is 1 second. google.protobuf.Duration interval = 2; }