syntax = "proto3";

package envoy.api.v2;

import "api/base.proto";
import "api/discovery.proto";
import "api/health_check.proto";

import "google/api/annotations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/wrappers.proto";

// [#protodoc-title: Endpoints and EDS]

service EndpointDiscoveryService {
  // The resource_names field in DiscoveryRequest specifies a list of clusters
  // to subscribe to updates for.
  rpc StreamEndpoints(stream DiscoveryRequest)
      returns (stream DiscoveryResponse) {
  }

  rpc FetchEndpoints(DiscoveryRequest)
      returns (DiscoveryResponse) {
    option (google.api.http) = {
      post: "/v2/discovery:endpoints"
      body: "*"
    };
  }

  // Advanced API to allow for multi-dimensional load balancing by remote
  // server. For receiving LB assignments, the steps are:
  // 1, The management server is configured with per cluster/zone/load metric
  //    capacity configuration. The capacity configuration definition is
  //    outside of the scope of this document.
  // 2. Envoy issues a standard {Stream,Fetch}Endpoints request for the clusters
  //    to balance.
  //
  // Independently, Envoy will initiate a StreamLoadStats bidi stream with a
  // management server:
  // 1. Once a connection establishes, the management server publishes a
  //    LoadStatsResponse for all clusters it is interested in learning load
  //    stats about.
  // 2. For each cluster, Envoy load balances incoming traffic to upstream hosts
  //    based on per-zone weights and/or per-instance weights (if specified)
  //    based on intra-zone LbPolicy. This information comes from the above
  //    {Stream,Fetch}Endpoints.
  // 3. When upstream hosts reply, they optionally add header <define header
  //    name> with ASCII representation of EndpointLoadMetricStats.
  // 4. Envoy aggregates load reports over the period of time given to it in
  //    LoadStatsResponse.load_reporting_interval. This includes aggregation
  //    stats Envoy maintains by itself (total_requests, rpc_errors etc.) as
  //    well as load metrics from upstream hosts.
  // 5. When the timer of load_reporting_interval expires, Envoy sends new
  //    LoadStatsRequest filled with load reports for each cluster.
  // 6. The management server uses the load reports from all reported Envoys
  //    from around the world, computes global assignment and prepares traffic
  //    assignment destined for each zone Envoys are located in. Goto 2.
  rpc StreamLoadStats(stream LoadStatsRequest)
      returns (stream LoadStatsResponse) {
  }
}

message LbEndpoint {
  Endpoint endpoint = 1;

  // [#not-implemented-hide:] Optional health status when known and supplied by EDS server.
  HealthStatus health_status = 2;

  // The endpoint metadata specifies values that may be used by the load
  // balancer to select endpoints in a cluster for a given request. The filter
  // name should be specified as "envoy.lb". An example boolean key-value pair
  // is "canary", providing the optional canary status of the upstream host.
  // This may be matched against in a route's ForwardAction metadata_match field
  // to subset the endpoints considered in cluster load balancing.
  // TODO(htuch: [V2-API-DIFF] Need to plumb this through Envoy and have
  // everywhere that canary is used be capable of working on metadata.
  Metadata metadata = 3;

  // The optional load balancing weight of the upstream host, in the range 1 -
  // 128. Envoy uses the load balancing weight in some of the built in load
  // balancers. The load balancing weight for an endpoint is divided by the sum
  // of the weights of all endpoints in the endpoint's locality to produce a
  // percentage of traffic for the endpoint. This percentage is then further
  // weighted by the endpoint's locality's load balancing weight from
  // LocalityLbEndpoints. If unspecified, each host is presumed to have equal
  // weight in a locality.
  // The limit of 128 is somewhat arbitrary, but is applied due to performance
  // concerns with the current implementation and can be removed when
  // https://github.com/envoyproxy/envoy/issues/1285 is fixed.
  google.protobuf.UInt32Value load_balancing_weight = 4;
}

// A group of endpoints belonging to a Locality.
// One can have multiple LocalityLbEndpoints for a locality, but this is
// generally only done if the different groups need to have different load
// balancing weights or different priorities.
message LocalityLbEndpoints {
  Locality locality = 1;
  repeated LbEndpoint lb_endpoints = 2;

  // Optional: Per priority/region/zone/sub_zone weight - range 1-128. The load balancing
  // weight for a locality is divided by the sum of the weights of all
  // localities  at the same priority level to produce the effective percentage of traffic
  // for the locality.
  //
  // Weights must be specified for either all localities in a given priority
  // level or none.
  //
  // If unspecified, each locality is presumed to have equal weight in a
  // cluster.
  //
  // The limit of 128 is somewhat arbitrary, but is applied due to performance
  // concerns with the current implementation and can be removed when
  // https://github.com/envoyproxy/envoy/issues/1285 is fixed.
  google.protobuf.UInt32Value load_balancing_weight = 3;

  // Optional: the priority for this LocalityLbEndpoints.  If unspecified this will
  // default to the highest priority (0).
  //
  // Under usual circumstances, Envoy will only select endpoints for the highest
  // priority (0).  In the event all endpoints for a particular priority are
  // unavailable/unhealthy, Envoy will fail over to selecting endpoints for the
  // next highest priority group.
  //
  // Priorities should range from 0 (highest) to N (lowest) without skipping.
  google.protobuf.UInt32Value priority = 5;
}

// Example load report from a single request:
//
// [metric name, metric value]
// * cpu_seconds, 0.7
// * flash_utilization, 75
//
// When aggregating Envoy needs to count how many request's load reports
// included each metric type, so Envoy can account for requests that don't
// include that metric type. e.g.:
//
// [name, count, sum of values]
// * cpu_seconds, 10, 17.5
// * flash_utilization, 5, 375
message EndpointLoadMetricStats {
  // Name of the metric; may be empty.
  string metric_name = 1;
  // Number of calls that finished and included this metric.
  uint64 num_requests_finished_with_metric = 2;
  // Sum of metric values across all calls that finished with this metric for
  // load_reporting_interval.
  double total_metric_value = 3;
}

// These are stats Envoy reports to GLB every so often. Report frequency is
// defined by LoadAssignmentResponse.interval
// Stats per upstream region/zone and optionally per subzone
message UpstreamLocalityStats {
  // Name of zone, region and optionally endpoint group this metrics was
  // collected from. Zone and region names could be empty if unknown.
  Locality locality = 1;

  // The total number of requests sent by this Envoy since the last report. A
  // single HTTP or gRPC request or stream is counted as one request. A TCP
  // connection is also treated as one request. There is no explicit
  // total_requests field below for a locality, but it may be inferred from:
  //
  // .. code-block:: none
  //
  //   total_requests = total_successful_requests + total_requests_in_progress +
  //     total_error_requests
  //
  // The total number of requests successfully completed by the endpoints in the
  // locality. These include non-5xx responses for HTTP, where errors
  // originate at the client and the endpoint responded successfuly. For gRPC,
  // the grpc-status values are those not covered by total_error_requests below.
  uint64 total_successful_requests = 2;
  // The total number of unfinished requests
  uint64 total_requests_in_progress = 3;
  // The total number of requests that failed due to errors at the endpoint.
  // For HTTP these are responses with 5xx status codes and for gRPC the
  // grpc-status values {DeadlineExceeded, Unimplemented, Internal,
  // Unavailable, Unknown, DataLoss}.
  uint64 total_error_requests = 4;

  // Stats for multi-dimensional load balancing.
  repeated EndpointLoadMetricStats load_metric_stats = 5;
}

// Per cluster stats
message ClusterStats {
  string cluster_name = 1;
  // Need at least one.
  repeated UpstreamLocalityStats upstream_locality_stats = 2;

  // Cluster-level stats such as total_successful_requests may be computed by
  // summing upstream_locality_stats. In addition, below there are additional
  // cluster-wide stats. The following total_requests equality holds at the
  // cluster-level:
  //
  // .. code-block:: none
  //
  //   sum_locality(total_successful_requests) + sum_locality(total_requests_in_progress) +
  //     sum_locality(total_error_requests) + total_dropped_requests`
  //
  // The total number of dropped requests. This covers requests
  // deliberately dropped by the drop_overload policy and circuit breaking.
  uint64 total_dropped_requests = 3;
}

message LoadStatsRequest {
  Node node = 1;  // zone/region where this Envoy runs
  repeated ClusterStats cluster_stats = 2;
}

// Each route from RDS will map to a single cluster or traffic split across
// clusters using weights expressed in the RDS WeightedCluster.
//
// With EDS, each cluster is treated independently from a LB perspective, with
// LB taking place between the Localities within a cluster and at a finer
// granularity between the hosts within a locality. For a given cluster, the
// effective weight of a host is its load_balancing_weight multiplied by the
// load_balancing_weight of its Locality.
message ClusterLoadAssignment {
  string cluster_name = 1;
  repeated LocalityLbEndpoints endpoints = 2;
  message Policy {
    // Percentage of traffic (0-100) that should be dropped. This
    // action allows protection of upstream hosts should they unable to
    // recover from an outage or should they be unable to autoscale and hence
    // overall incoming traffic volume need to be trimmed to protect them.
    // [V2-API-DIFF] This is known as maintenance mode in v1.
    double drop_overload = 1;
  }
  Policy policy = 4;
}

message LoadStatsResponse {
  // Clusters to report stats for.
  repeated string clusters = 1;
  // The default is 10 seconds.
  google.protobuf.Duration load_reporting_interval = 2;
}