data-plane-api/envoy/service/discovery/v2/hds.proto

syntax = "proto3";

package envoy.service.discovery.v2;

option java_package = "io.envoyproxy.envoy.service.discovery.v2";
option java_outer_classname = "HdsProto";
option java_multiple_files = true;
option java_generic_services = true;

import "envoy/api/v2/core/base.proto";
import "envoy/api/v2/core/health_check.proto";
import "envoy/api/v2/endpoint/endpoint.proto";

import "google/api/annotations.proto";
import "google/protobuf/duration.proto";

// HDS is Health Discovery Service. It compliments Envoy’s health checking
// service by designating this Envoy to be a healthchecker for a subset of hosts
// in the cluster. The status of these health checks will be reported to the
// management server, where it can be aggregated etc and redistributed back to
// Envoy through EDS.
service HealthDiscoveryService {
  // 1. Envoy starts up and if its can_healthcheck option in the static
  //    bootstrap config is enabled, sends HealthCheckRequest to the management
  //    server. It supplies its capabilities (which protocol it can health check
  //    with, what zone it resides in, etc.).
  // 2. In response to (1), the management server designates this Envoy as a
  //    healthchecker to health check a subset of all upstream hosts for a given
  //    cluster (for example upstream Host 1 and Host 2). It streams
  //    HealthCheckSpecifier messages with cluster related configuration for all
  //    clusters this Envoy is designated to health check. Subsequent
  //    HealthCheckSpecifier message will be sent on changes to:
  //    a. Endpoints to health checks
  //    b. Per cluster configuration change
  // 3. Envoy creates a health probe based on the HealthCheck config and sends
  //    it to endpoint(ip:port) of Host 1 and 2. Based on the HealthCheck
  //    configuration Envoy waits upon the arrival of the probe response and
  //    looks at the content of the response to decide whether the endpoint is
  //    healthy or not. If a response hasn't been received within the timeout
  //    interval, the endpoint health status is considered TIMEOUT.
  // 4. Envoy reports results back in an EndpointHealthResponse message.
  //    Envoy streams responses as often as the interval configured by the
  //    management server in HealthCheckSpecifier.
  // 5. The management Server collects health statuses for all endpoints in the
  //    cluster (for all clusters) and uses this information to construct
  //    EndpointDiscoveryResponse messages.
  // 6. Once Envoy has a list of upstream endpoints to send traffic to, it load
  //    balances traffic to them without additional health checking. It may
  //    use inline healthcheck (i.e. consider endpoint UNHEALTHY if connection
  //    failed to a particular endpoint to account for health status propagation
  //    delay between HDS and EDS).
  // By default, can_healthcheck is true. If can_healthcheck is false, Cluster
  // configuration may not contain HealthCheck message.
  // TODO(htuch): How is can_healthcheck communicated to CDS to ensure the above
  // invariant?
  // TODO(htuch): Add @amb67's diagram.
  rpc StreamHealthCheck(stream HealthCheckRequestOrEndpointHealthResponse)
      returns (stream HealthCheckSpecifier) {
  }

  // TODO(htuch): Unlike the gRPC version, there is no stream-based binding of
  // request/response. Should we add an identifier to the HealthCheckSpecifier
  // to bind with the response?
  rpc FetchHealthCheck(HealthCheckRequestOrEndpointHealthResponse) returns (HealthCheckSpecifier) {
    option (google.api.http).post = "/v2/discovery:health_check";
    option (google.api.http).body = "*";
  }
}

// Defines supported protocols etc, so the management server can assign proper
// endpoints to healthcheck.
message Capability {
  // Different Envoy instances may have different capabilities (e.g. Redis)
  // and/or have ports enabled for different protocols.
  enum Protocol {
    HTTP = 0;
    TCP = 1;
    REDIS = 2;
  }

  repeated Protocol health_check_protocols = 1;
}

message HealthCheckRequest {
  api.v2.core.Node node = 1;

  Capability capability = 2;
}

message EndpointHealth {
  api.v2.endpoint.Endpoint endpoint = 1;

  api.v2.core.HealthStatus health_status = 2;
}

message EndpointHealthResponse {
  repeated EndpointHealth endpoints_health = 1;
}

message HealthCheckRequestOrEndpointHealthResponse {
  oneof request_type {
    HealthCheckRequest health_check_request = 1;

    EndpointHealthResponse endpoint_health_response = 2;
  }
}

message LocalityEndpoints {
  api.v2.core.Locality locality = 1;

  repeated api.v2.endpoint.Endpoint endpoints = 2;
}

// The cluster name and locality is provided to Envoy for the endpoints that it
// health checks to support statistics reporting, logging and debugging by the
// Envoy instance (outside of HDS). For maximum usefulness, it should match the
// same cluster structure as that provided by EDS.
message ClusterHealthCheck {
  string cluster_name = 1;

  repeated api.v2.core.HealthCheck health_checks = 2;

  repeated LocalityEndpoints locality_endpoints = 3;
}

message HealthCheckSpecifier {
  repeated ClusterHealthCheck cluster_health_checks = 1;

  // The default is 1 second.
  google.protobuf.Duration interval = 2;
}