syntax = "proto3"; package envoy.service.ratelimit.v3; import "envoy/config/core/v3/base.proto"; import "envoy/extensions/common/ratelimit/v3/ratelimit.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; import "udpa/annotations/status.proto"; import "udpa/annotations/versioning.proto"; import "validate/validate.proto"; option java_package = "io.envoyproxy.envoy.service.ratelimit.v3"; option java_outer_classname = "RlsProto"; option java_multiple_files = true; option java_generic_services = true; option (udpa.annotations.file_status).package_version_status = ACTIVE; // [#protodoc-title: Rate Limit Service (RLS)] service RateLimitService { // Determine whether rate limiting should take place. rpc ShouldRateLimit(RateLimitRequest) returns (RateLimitResponse) { } } // Main message for a rate limit request. The rate limit service is designed to be fully generic // in the sense that it can operate on arbitrary hierarchical key/value pairs. The loaded // configuration will parse the request and find the most specific limit to apply. In addition, // a RateLimitRequest can contain multiple "descriptors" to limit on. When multiple descriptors // are provided, the server will limit on *ALL* of them and return an OVER_LIMIT response if any // of them are over limit. This enables more complex application level rate limiting scenarios // if desired. message RateLimitRequest { option (udpa.annotations.versioning).previous_message_type = "envoy.service.ratelimit.v2.RateLimitRequest"; // All rate limit requests must specify a domain. This enables the configuration to be per // application without fear of overlap. E.g., "envoy". string domain = 1; // All rate limit requests must specify at least one RateLimitDescriptor. Each descriptor is // processed by the service (see below). If any of the descriptors are over limit, the entire // request is considered to be over limit. repeated envoy.extensions.common.ratelimit.v3.RateLimitDescriptor descriptors = 2; // Rate limit requests can optionally specify the number of hits a request adds to the matched // limit. If the value is not set in the message, a request increases the matched limit by 1. uint32 hits_addend = 3; } // A response from a ShouldRateLimit call. // [#next-free-field: 7] message RateLimitResponse { option (udpa.annotations.versioning).previous_message_type = "envoy.service.ratelimit.v2.RateLimitResponse"; enum Code { // The response code is not known. UNKNOWN = 0; // The response code to notify that the number of requests are under limit. OK = 1; // The response code to notify that the number of requests are over limit. OVER_LIMIT = 2; } // Defines an actual rate limit in terms of requests per unit of time and the unit itself. message RateLimit { option (udpa.annotations.versioning).previous_message_type = "envoy.service.ratelimit.v2.RateLimitResponse.RateLimit"; // Identifies the unit of of time for rate limit. // [#comment: replace by envoy/type/v3/ratelimit_unit.proto in v4] enum Unit { // The time unit is not known. UNKNOWN = 0; // The time unit representing a second. SECOND = 1; // The time unit representing a minute. MINUTE = 2; // The time unit representing an hour. HOUR = 3; // The time unit representing a day. DAY = 4; } // A name or description of this limit. string name = 3; // The number of requests per unit of time. uint32 requests_per_unit = 1; // The unit of time. Unit unit = 2; } // Cacheable quota for responses, see documentation for the :ref:`quota // ` field. // [#not-implemented-hide:] message Quota { // Number of matching requests granted in quota. Must be 1 or more. uint32 requests = 1 [(validate.rules).uint32 = {gt: 0}]; oneof expiration_specifier { // Point in time at which the quota expires. google.protobuf.Timestamp valid_until = 2; } } // [#next-free-field: 6] message DescriptorStatus { option (udpa.annotations.versioning).previous_message_type = "envoy.service.ratelimit.v2.RateLimitResponse.DescriptorStatus"; // The response code for an individual descriptor. Code code = 1; // The current limit as configured by the server. Useful for debugging, etc. RateLimit current_limit = 2; // The limit remaining in the current time unit. uint32 limit_remaining = 3; // Duration until reset of the current limit window. google.protobuf.Duration duration_until_reset = 4; // Quota granted for the descriptor. This is a certain number of requests over a period of time. // The client may cache this result and apply the effective RateLimitResponse to future matching // requests containing a matching descriptor without querying rate limit service. // // Quota is available for a request if its descriptor set has cached quota available for all // descriptors. // // If quota is available, a RLS request will not be made and the quota will be reduced by 1 for // all matching descriptors. // // If there is not sufficient quota, there are three cases: // 1. A cached entry exists for a RLS descriptor that is out-of-quota, but not expired. // In this case, the request will be treated as OVER_LIMIT. // 2. Some RLS descriptors have a cached entry that has valid quota but some RLS descriptors // have no cached entry. This will trigger a new RLS request. // When the result is returned, a single unit will be consumed from the quota for all // matching descriptors. // If the server did not provide a quota, such as the quota message is empty for some of // the descriptors, then the request admission is determined by the // :ref:`overall_code `. // 3. All RLS descriptors lack a cached entry, this will trigger a new RLS request, // When the result is returned, a single unit will be consumed from the quota for all // matching descriptors. // If the server did not provide a quota, such as the quota message is empty for some of // the descriptors, then the request admission is determined by the // :ref:`overall_code `. // // When quota expires due to timeout, a new RLS request will also be made. // The implementation may choose to preemptively query the rate limit server for more quota on or // before expiration or before the available quota runs out. // [#not-implemented-hide:] Quota quota = 5; } // The overall response code which takes into account all of the descriptors that were passed // in the RateLimitRequest message. Code overall_code = 1; // A list of DescriptorStatus messages which matches the length of the descriptor list passed // in the RateLimitRequest. This can be used by the caller to determine which individual // descriptors failed and/or what the currently configured limits are for all of them. repeated DescriptorStatus statuses = 2; // A list of headers to add to the response repeated config.core.v3.HeaderValue response_headers_to_add = 3; // A list of headers to add to the request when forwarded repeated config.core.v3.HeaderValue request_headers_to_add = 4; // A response body to send to the downstream client when the response code is not OK. bytes raw_body = 5; // Optional response metadata that will be emitted as dynamic metadata to be consumed by the next // filter. This metadata lives in a namespace specified by the canonical name of extension filter // that requires it: // // - :ref:`envoy.filters.http.ratelimit ` for HTTP filter. // - :ref:`envoy.filters.network.ratelimit ` for network filter. // - :ref:`envoy.filters.thrift.rate_limit ` for Thrift filter. google.protobuf.Struct dynamic_metadata = 6; }