syntax = "proto3"; package envoy.api.v2; import "api/base.proto"; import "api/discovery.proto"; import "api/health_check.proto"; import "google/api/annotations.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/wrappers.proto"; service EndpointDiscoveryService { // The resource_names field in DiscoveryRequest specifies a list of clusters // to subscribe to updates for. rpc StreamEndpoints(stream DiscoveryRequest) returns (stream DiscoveryResponse) { } rpc FetchEndpoints(DiscoveryRequest) returns (DiscoveryResponse) { option (google.api.http) = { post: "/v2/discovery:endpoints" body: "*" }; } // Advanced API to allow for multi-dimensional load balancing by remote // server. For receiving LB assignments, the steps are: // 1, The management server is configured with per cluster/zone/load metric // capacity configuration. The capacity configuration definition is // outside of the scope of this document. // 2. Envoy issues a standard {Stream,Fetch}Endpoints request for the clusters // to balance. // // Independently, Envoy will initiate a StreamLoadStats bidi stream with a // management server: // 1. Once a connection establishes, the management server publishes a // LoadStatsResponse for all clusters it is interested in learning load // stats about. // 2. For each cluster, Envoy load balances incoming traffic to upstream hosts // based on per-zone weights and/or per-instance weights (if specified) // based on intra-zone LbPolicy. This information comes from the above // {Stream,Fetch}Endpoints. // 3. When upstream hosts reply, they optionally add header with ASCII representation of EndpointLoadMetricStats. // 4. Envoy aggregates load reports over the period of time given to it in // LoadStatsResponse.load_reporting_interval. This includes aggregation // stats Envoy maintains by itself (total_requests, rpc_errors etc.) as // well as load metrics from upstream hosts. // 5. When the timer of load_reporting_interval expires, Envoy sends new // LoadStatsRequest filled with load reports for each cluster. // 6. The management server uses the load reports from all reported Envoys // from around the world, computes global assignment and prepares traffic // assignment destined for each zone Envoys are located in. Goto 2. rpc StreamLoadStats(stream LoadStatsRequest) returns (stream LoadStatsResponse) { } } message LbEndpoint { Endpoint endpoint = 1; // Optional health status when known and supplied by EDS server. HealthStatus health_status = 2; // The endpoint metadata specifies values that may be used by the load // balancer to select endpoints in a cluster for a given request. The filter // name should be specified as "envoy.lb". An example boolean key-value pair // is "canary", providing the optional canary status of the upstream host. // This may be matched against in a route's ForwardAction metadata_match field // to subset the endpoints considered in cluster load balancing. // TODO(htuch: [V2-API-DIFF] Need to plumb this through Envoy and have // everywhere that canary is used be capable of working on metadata. Metadata metadata = 3; // The optional load balancing weight of the upstream host, in the range 1 - // 100. Envoy uses the load balancing weight in some of the built in load // balancers. The load balancing weight for an endpoint is divided by the sum // of the weights of all endpoints in the endpoint's locality to produce a // percentage of traffic for the endpoint. This percentage is then further // weighted by the endpoint's locality's load balancing weight from // LocalityLbEndpoints. If unspecified, each host is presumed to have equal // weight in a locality. google.protobuf.UInt32Value load_balancing_weight = 4; } // All endpoints belonging to a Locality. message LocalityLbEndpoints { Locality locality = 1; repeated LbEndpoint lb_endpoints = 2; // Optional: Per region/zone/sub_zone weight - range 1-100. The load balancing // weight for a locality is divided by the sum of the weights of all // localities to produce the effective percentage of traffic for the locality. // If unspecified, each locality is presumed to have equal weight in a // cluster. google.protobuf.UInt32Value load_balancing_weight = 3; } // Example load report from a single request: // // [metric name, metric value] // * cpu_seconds, 0.7 // * flash_utilization, 75 // // When aggregating Envoy needs to count how many request's load reports // included each metric type, so Envoy can account for requests that don't // include that metric type. e.g.: // // [name, count, sum of values] // * cpu_seconds, 10, 17.5 // * flash_utilization, 5, 375 message EndpointLoadMetricStats { // Name of the metric; may be empty. string metric_name = 1; // Number of calls that finished and included this metric. uint64 num_requests_finished_with_metric = 2; // Sum of metric values across all calls that finished with this metric for // load_reporting_interval. double total_metric_value = 3; } // These are stats Envoy reports to GLB every so often. Report frequency is // defined by LoadAssignmentResponse.interval // Stats per upstream region/zone and optionally per subzone message UpstreamLocalityStats { // Name of zone, region and optionally endpoint group this metrics was // collected from. Zone and region names could be empty if unknown. Locality locality = 1; // The total number of requests sent by this Envoy since the last report. A // single HTTP or gRPC request or stream is counted as one request. A TCP // connection is also treated as one request. There is no explicit // total_requests field below, but it may be inferred from: // total_requests = total_successful_requests + total_requests_in_progress + // total_error_requests + total_dropped_requests // The total number of requests successfully completed by the endpoints in the // locality. These include non-5xx responses for HTTP, where errors // originate at the client and the endpoint responded successfuly. For gRPC, // the grpc-status values are those not covered by total_error_requests below. uint64 total_successful_requests = 2; // The total number of unfinished requests uint64 total_requests_in_progress = 3; // The total number of requests that failed due to errors at the endpoint. // For HTTP these are responses with 5xx status codes and for gRPC the // grpc-status values {DeadlineExceeded, Unimplemented, Internal, // Unavailable, Unknown, DataLoss}. uint64 total_error_requests = 4; // The total number of dropped requests. This covers requests // deliberately dropped by the drop_overload policy and circuit breaking. uint64 total_dropped_requests = 5; // Stats for multi-dimensional load balancing. repeated EndpointLoadMetricStats load_metric_stats = 6; } // Per cluster stats message ClusterStats { string cluster_name = 1; // Need at least one. repeated UpstreamLocalityStats upstream_locality_stats = 2; } message LoadStatsRequest { Node node = 1; // zone/region where this Envoy runs repeated ClusterStats cluster_stats = 2; } // Each route from RDS will map to a single cluster or traffic split across // clusters using weights expressed in the RDS WeightedCluster. // // With EDS, each cluster is treated independently from a LB perspective, with // LB taking place between the Localities within a cluster and at a finer // granularity between the hosts within a locality. For a given cluster, the // effective weight of a host is its load_balancing_weight multiplied by the // load_balancing_weight of its Locality. message ClusterLoadAssignment { string cluster_name = 1; repeated LocalityLbEndpoints endpoints = 2; // In the case where all endpoints for a particular zone/subzone are // unavailable/unhealthy, additional endpoints are given out for use in case // of catastrophic failure. They also have weights. repeated LocalityLbEndpoints failover_endpoints = 3; message Policy { // Percentage of traffic (0-100) that should be dropped. This // action allows protection of upstream hosts should they unable to // recover from an outage or should they be unable to autoscale and hence // overall incoming traffic volume need to be trimmed to protect them. // [V2-API-DIFF] This is known as maintenance mode in v1. double drop_overload = 1; } Policy policy = 4; } message LoadStatsResponse { // Clusters to report stats for. repeated string clusters = 1; // The default is 10 seconds. google.protobuf.Duration load_reporting_interval = 2; }