[PSM Interop] Delete PSM interop source per new repo migration (#35466)

New source of truth: https://github.com/grpc/psm-interop.

This PR removes PSM Interop framework source code from `tools/run_tests/xds_k8s_test_driver`, and all references to it.

Closes #35466

PiperOrigin-RevId: 597636949
pull/35529/head
Sergii Tkachenko 11 months ago committed by Copybara-Service
parent f8b23fd29a
commit 22682a78f6
  1. 1
      black.toml
  2. 3
      tools/distrib/pylint_code.sh
  3. 2
      tools/internal_ci/linux/grpc_xds_k8s_install_test_driver.sh
  4. 2
      tools/internal_ci/linux/grpc_xds_k8s_lb.sh
  5. 2
      tools/internal_ci/linux/grpc_xds_k8s_lb_python.sh
  6. 2
      tools/internal_ci/linux/grpc_xds_k8s_run_xtest.sh
  7. 2
      tools/internal_ci/linux/grpc_xds_url_map.sh
  8. 2
      tools/internal_ci/linux/grpc_xds_url_map_python.sh
  9. 2
      tools/internal_ci/linux/psm-csm.sh
  10. 2
      tools/internal_ci/linux/psm-security-python.sh
  11. 2
      tools/internal_ci/linux/psm-security.sh
  12. 5
      tools/run_tests/xds_k8s_test_driver/.gitignore
  13. 457
      tools/run_tests/xds_k8s_test_driver/README.md
  14. 13
      tools/run_tests/xds_k8s_test_driver/bin/__init__.py
  15. 60
      tools/run_tests/xds_k8s_test_driver/bin/black.sh
  16. 59
      tools/run_tests/xds_k8s_test_driver/bin/cleanup.sh
  17. 2
      tools/run_tests/xds_k8s_test_driver/bin/cleanup/README.md
  18. 714
      tools/run_tests/xds_k8s_test_driver/bin/cleanup/cleanup.py
  19. 8
      tools/run_tests/xds_k8s_test_driver/bin/cleanup/keep_xds_interop_resources.json
  20. 95
      tools/run_tests/xds_k8s_test_driver/bin/cleanup_cluster.sh
  21. 29
      tools/run_tests/xds_k8s_test_driver/bin/ensure_venv.sh
  22. 28
      tools/run_tests/xds_k8s_test_driver/bin/freeze.sh
  23. 60
      tools/run_tests/xds_k8s_test_driver/bin/isort.sh
  24. 13
      tools/run_tests/xds_k8s_test_driver/bin/lib/__init__.py
  25. 191
      tools/run_tests/xds_k8s_test_driver/bin/lib/common.py
  26. 271
      tools/run_tests/xds_k8s_test_driver/bin/run_channelz.py
  27. 169
      tools/run_tests/xds_k8s_test_driver/bin/run_ping_pong.py
  28. 310
      tools/run_tests/xds_k8s_test_driver/bin/run_td_setup.py
  29. 162
      tools/run_tests/xds_k8s_test_driver/bin/run_test_client.py
  30. 123
      tools/run_tests/xds_k8s_test_driver/bin/run_test_server.py
  31. 3
      tools/run_tests/xds_k8s_test_driver/config/common-csm.cfg
  32. 11
      tools/run_tests/xds_k8s_test_driver/config/common.cfg
  33. 4
      tools/run_tests/xds_k8s_test_driver/config/gamma.cfg
  34. 9
      tools/run_tests/xds_k8s_test_driver/config/grpc-testing.cfg
  35. 62
      tools/run_tests/xds_k8s_test_driver/config/local-dev.cfg.example
  36. 15
      tools/run_tests/xds_k8s_test_driver/config/url-map.cfg
  37. 13
      tools/run_tests/xds_k8s_test_driver/framework/__init__.py
  38. 182
      tools/run_tests/xds_k8s_test_driver/framework/bootstrap_generator_testcase.py
  39. 58
      tools/run_tests/xds_k8s_test_driver/framework/errors.py
  40. 13
      tools/run_tests/xds_k8s_test_driver/framework/helpers/__init__.py
  41. 79
      tools/run_tests/xds_k8s_test_driver/framework/helpers/datetime.py
  42. 204
      tools/run_tests/xds_k8s_test_driver/framework/helpers/grpc.py
  43. 106
      tools/run_tests/xds_k8s_test_driver/framework/helpers/highlighter.py
  44. 48
      tools/run_tests/xds_k8s_test_driver/framework/helpers/logs.py
  45. 49
      tools/run_tests/xds_k8s_test_driver/framework/helpers/rand.py
  46. 273
      tools/run_tests/xds_k8s_test_driver/framework/helpers/retryers.py
  47. 103
      tools/run_tests/xds_k8s_test_driver/framework/helpers/skips.py
  48. 13
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/__init__.py
  49. 18
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/__init__.py
  50. 542
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py
  51. 637
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py
  52. 361
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/iam.py
  53. 221
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/network_security.py
  54. 461
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/network_services.py
  55. 1152
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/k8s.py
  56. 13
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/k8s_internal/__init__.py
  57. 142
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/k8s_internal/k8s_log_collector.py
  58. 133
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/k8s_internal/k8s_port_forwarder.py
  59. 1118
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/traffic_director.py
  60. 22
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/traffic_director_gamma.py
  61. 14
      tools/run_tests/xds_k8s_test_driver/framework/rpc/__init__.py
  62. 117
      tools/run_tests/xds_k8s_test_driver/framework/rpc/grpc.py
  63. 273
      tools/run_tests/xds_k8s_test_driver/framework/rpc/grpc_channelz.py
  64. 66
      tools/run_tests/xds_k8s_test_driver/framework/rpc/grpc_csds.py
  65. 176
      tools/run_tests/xds_k8s_test_driver/framework/rpc/grpc_testing.py
  66. 849
      tools/run_tests/xds_k8s_test_driver/framework/rpc/xds_protos_imports.py
  67. 13
      tools/run_tests/xds_k8s_test_driver/framework/test_app/__init__.py
  68. 535
      tools/run_tests/xds_k8s_test_driver/framework/test_app/client_app.py
  69. 13
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/__init__.py
  70. 105
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/base_runner.py
  71. 13
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/__init__.py
  72. 303
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/gamma_server_runner.py
  73. 953
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py
  74. 238
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_xds_client_runner.py
  75. 324
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_xds_server_runner.py
  76. 180
      tools/run_tests/xds_k8s_test_driver/framework/test_app/server_app.py
  77. 13
      tools/run_tests/xds_k8s_test_driver/framework/test_cases/__init__.py
  78. 65
      tools/run_tests/xds_k8s_test_driver/framework/test_cases/base_testcase.py
  79. 90
      tools/run_tests/xds_k8s_test_driver/framework/test_cases/session_affinity_util.py
  80. 212
      tools/run_tests/xds_k8s_test_driver/framework/xds_flags.py
  81. 137
      tools/run_tests/xds_k8s_test_driver/framework/xds_gamma_testcase.py
  82. 94
      tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_flags.py
  83. 1212
      tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py
  84. 378
      tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_test_resources.py
  85. 607
      tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_testcase.py
  86. 94
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/client-secure.deployment.yaml
  87. 104
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/client.deployment.yaml
  88. 15
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/csm/pod-monitoring.yaml
  89. 17
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/backend_policy.yaml
  90. 10
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/frontend_service.yaml
  91. 22
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/route_grpc.yaml
  92. 23
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/route_http.yaml
  93. 29
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/route_http_ssafilter.yaml
  94. 17
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/service.yaml
  95. 10
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/session_affinity_filter.yaml
  96. 15
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/session_affinity_policy_route.yaml
  97. 15
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/gamma/session_affinity_policy_service.yaml
  98. 9
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/namespace.yaml
  99. 95
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/server-secure.deployment.yaml
  100. 101
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/server.deployment.yaml
  101. Some files were not shown because too many files have changed in this diff Show More

@ -30,7 +30,6 @@ line_length = 80
src_paths = [
"examples/python/data_transmission",
"examples/python/async_streaming",
"tools/run_tests/xds_k8s_test_driver",
"src/python/grpcio_tests",
"tools/run_tests",
]

@ -29,14 +29,11 @@ DIRS=(
'src/python/grpcio_testing/grpc_testing'
'src/python/grpcio_status/grpc_status'
'src/python/grpcio_observability/grpc_observability'
'tools/run_tests/xds_k8s_test_driver/bin'
'tools/run_tests/xds_k8s_test_driver/framework'
)
TEST_DIRS=(
'src/python/grpcio_tests/tests'
'src/python/grpcio_tests/tests_gevent'
'tools/run_tests/xds_k8s_test_driver/tests'
)
VIRTUALENV=python_pylint_venv

@ -284,7 +284,7 @@ test_driver_compile_protos() {
#######################################
# Installs the test driver and it's requirements.
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#installation
# https://github.com/grpc/psm-interop#installation
# Globals:
# TEST_DRIVER_REPO_DIR: Populated with the path to the repo containing
# the test driver

@ -99,7 +99,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -112,7 +112,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -69,7 +69,7 @@ run_test() {
exit 1
fi
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local client_lang="$1"
local client_branch="$2"
local server_lang="$3"

@ -87,7 +87,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -97,7 +97,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -99,7 +99,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -114,7 +114,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -99,7 +99,7 @@ build_docker_images_if_needed() {
#######################################
run_test() {
# Test driver usage:
# https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver#basic-usage
# https://github.com/grpc/psm-interop#basic-usage
local test_name="${1:?Usage: run_test test_name}"
local out_dir="${TEST_XML_OUTPUT_DIR}/${test_name}"
mkdir -pv "${out_dir}"

@ -1,5 +0,0 @@
config/local-*.cfg
src/proto
venv/
venv-*/
out/

@ -1,458 +1,3 @@
# xDS Kubernetes Interop Tests
Proxyless Security Mesh Interop Tests executed on Kubernetes.
### Experimental
Work in progress. Internal APIs may and will change. Please refrain from making
changes to this codebase at the moment.
### Stabilization roadmap
- [x] Replace retrying with tenacity
- [x] Generate namespace for each test to prevent resource name conflicts and
allow running tests in parallel
- [x] Security: run server and client in separate namespaces
- [ ] Make framework.infrastructure.gcp resources [first-class
citizen](https://en.wikipedia.org/wiki/First-class_citizen), support
simpler CRUD
- [x] Security: manage `roles/iam.workloadIdentityUser` role grant lifecycle for
dynamically-named namespaces
- [x] Restructure `framework.test_app` and `framework.xds_k8s*` into a module
containing xDS-interop-specific logic
- [ ] Address inline TODOs in code
- [x] Improve README.md documentation, explain helpers in bin/ folder
## Installation
#### Requirements
1. Python v3.9+
2. [Google Cloud SDK](https://cloud.google.com/sdk/docs/install)
3. `kubectl`
`kubectl` can be installed via `gcloud components install kubectl`, or system package manager: https://kubernetes.io/docs/tasks/tools/#kubectl
Python3 venv tool may need to be installed from APT on some Ubuntu systems:
```shell
sudo apt-get install python3-venv
```
##### Getting Started
1. If you haven't, [initialize](https://cloud.google.com/sdk/docs/install-sdk) gcloud SDK
2. Activate gcloud [configuration](https://cloud.google.com/sdk/docs/configurations) with your project
3. Enable gcloud services:
```shell
gcloud services enable \
compute.googleapis.com \
container.googleapis.com \
logging.googleapis.com \
monitoring.googleapis.com \
networksecurity.googleapis.com \
networkservices.googleapis.com \
secretmanager.googleapis.com \
trafficdirector.googleapis.com
```
#### Configure GKE cluster
This is an example outlining minimal requirements to run the [baseline tests](#xds-baseline-tests).
Update gloud sdk:
```shell
gcloud -q components update
```
Pre-populate environment variables for convenience. To find project id, refer to
[Identifying projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects).
```shell
export PROJECT_ID="your-project-id"
export PROJECT_NUMBER=$(gcloud projects describe "${PROJECT_ID}" --format="value(projectNumber)")
# Compute Engine default service account
export GCE_SA="${PROJECT_NUMBER}-compute@developer.gserviceaccount.com"
# The prefix to name GCP resources used by the framework
export RESOURCE_PREFIX="xds-k8s-interop-tests"
# The zone name your cluster, f.e. xds-k8s-test-cluster
export CLUSTER_NAME="${RESOURCE_PREFIX}-cluster"
# The zone of your cluster, f.e. us-central1-a
export ZONE="us-central1-a"
# Dedicated GCP Service Account to use with workload identity.
export WORKLOAD_SA_NAME="${RESOURCE_PREFIX}"
export WORKLOAD_SA_EMAIL="${WORKLOAD_SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
```
##### Create the cluster
Minimal requirements: [VPC-native](https://cloud.google.com/traffic-director/docs/security-proxyless-setup)
cluster with [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) enabled.
```shell
gcloud container clusters create "${CLUSTER_NAME}" \
--scopes=cloud-platform \
--zone="${ZONE}" \
--enable-ip-alias \
--workload-pool="${PROJECT_ID}.svc.id.goog" \
--workload-metadata=GKE_METADATA \
--tags=allow-health-checks
```
For security tests you also need to create CAs and configure the cluster to use those CAs
as described
[here](https://cloud.google.com/traffic-director/docs/security-proxyless-setup#configure-cas).
##### Create the firewall rule
Allow [health checking mechanisms](https://cloud.google.com/traffic-director/docs/set-up-proxyless-gke#creating_the_health_check_firewall_rule_and_backend_service)
to query the workloads health.
This step can be skipped, if the driver is executed with `--ensure_firewall`.
```shell
gcloud compute firewall-rules create "${RESOURCE_PREFIX}-allow-health-checks" \
--network=default --action=allow --direction=INGRESS \
--source-ranges="35.191.0.0/16,130.211.0.0/22" \
--target-tags=allow-health-checks \
--rules=tcp:8080-8100
```
##### Setup GCP Service Account
Create dedicated GCP Service Account to use
with [workload identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity).
```shell
gcloud iam service-accounts create "${WORKLOAD_SA_NAME}" \
--display-name="xDS K8S Interop Tests Workload Identity Service Account"
```
Enable the service account to [access the Traffic Director API](https://cloud.google.com/traffic-director/docs/prepare-for-envoy-setup#enable-service-account).
```shell
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
--member="serviceAccount:${WORKLOAD_SA_EMAIL}" \
--role="roles/trafficdirector.client"
```
##### Allow access to images
The test framework needs read access to the client and server images and the bootstrap
generator image. You may have these images in your project but if you want to use these
from the grpc-testing project you will have to grant the necessary access to these images
using https://cloud.google.com/container-registry/docs/access-control#grant or a
gsutil command. For example, to grant access to images stored in `grpc-testing` project GCR, run:
```sh
gsutil iam ch "serviceAccount:${GCE_SA}:objectViewer" gs://artifacts.grpc-testing.appspot.com/
```
##### Allow test driver to configure workload identity automatically
Test driver will automatically grant `roles/iam.workloadIdentityUser` to
allow the Kubernetes service account to impersonate the dedicated GCP workload
service account (corresponds to the step 5
of [Authenticating to Google Cloud](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#authenticating_to)).
This action requires the test framework to have `iam.serviceAccounts.create`
permission on the project.
If you're running test framework locally, and you have `roles/owner` to your
project, **you can skip this step**.
If you're configuring the test framework to run on a CI: use `roles/owner`
account once to allow test framework to grant `roles/iam.workloadIdentityUser`.
```shell
# Assuming CI is using Compute Engine default service account.
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
--member="serviceAccount:${GCE_SA}" \
--role="roles/iam.serviceAccountAdmin" \
--condition-from-file=<(cat <<-END
---
title: allow_workload_identity_only
description: Restrict serviceAccountAdmin to granting role iam.workloadIdentityUser
expression: |-
api.getAttribute('iam.googleapis.com/modifiedGrantsByRole', [])
.hasOnly(['roles/iam.workloadIdentityUser'])
END
)
```
##### Configure GKE cluster access
```shell
# Unless you're using GCP VM with preconfigured Application Default Credentials, acquire them for your user
gcloud auth application-default login
# Install authentication plugin for kubectl.
# Details: https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke
gcloud components install gke-gcloud-auth-plugin
# Configuring GKE cluster access for kubectl
gcloud container clusters get-credentials "${CLUSTER_NAME}" --zone "${ZONE}"
# Save generated kube context name
export KUBE_CONTEXT="$(kubectl config current-context)"
```
#### Install python dependencies
```shell
# Create python virtual environment
python3 -m venv venv
# Activate virtual environment
. ./venv/bin/activate
# Install requirements
pip install -r requirements.lock
# Generate protos
python -m grpc_tools.protoc --proto_path=../../../ \
--python_out=. --grpc_python_out=. \
src/proto/grpc/testing/empty.proto \
src/proto/grpc/testing/messages.proto \
src/proto/grpc/testing/test.proto
```
# Basic usage
## Local development
This test driver allows running tests locally against remote GKE clusters, right
from your dev environment. You need:
1. Follow [installation](#installation) instructions
2. Authenticated `gcloud`
3. `kubectl` context (see [Configure GKE cluster access](#configure-gke-cluster-access))
4. Run tests with `--debug_use_port_forwarding` argument. The test driver
will automatically start and stop port forwarding using
`kubectl` subprocesses. (experimental)
### Making changes to the driver
1. Install additional dev packages: `pip install -r requirements-dev.txt`
2. Use `./bin/black.sh` and `./bin/isort.sh` helpers to auto-format code.
### Updating Python Dependencies
We track our Python-level dependencies using three different files:
- `requirements.txt`
- `dev-requirements.txt`
- `requirements.lock`
`requirements.txt` lists modules without specific versions supplied, though
versions ranges may be specified. `requirements.lock` is generated from
`requirements.txt` and _does_ specify versions for every dependency in the
transitive dependency tree.
When updating `requirements.txt`, you must also update `requirements.lock`. To
do this, navigate to this directory and run `./bin/freeze.sh`.
### Setup test configuration
There are many arguments to be passed into the test run. You can save the
arguments to a config file ("flagfile") for your development environment.
Use [`config/local-dev.cfg.example`](https://github.com/grpc/grpc/blob/master/tools/run_tests/xds_k8s_test_driver/config/local-dev.cfg.example)
as a starting point:
```shell
cp config/local-dev.cfg.example config/local-dev.cfg
```
If you exported environment variables in the above sections, you can
template them into the local config (note this recreates the config):
```shell
envsubst < config/local-dev.cfg.example > config/local-dev.cfg
```
Learn more about flagfiles in [abseil documentation](https://abseil.io/docs/python/guides/flags#a-note-about---flagfile).
## Test suites
See the full list of available test suites in the [`tests/`](https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver/tests) folder.
### xDS Baseline Tests
Test suite meant to confirm that basic xDS features work as expected. Executing
it before other test suites will help to identify whether test failure related
to specific features under test, or caused by unrelated infrastructure
disturbances.
```shell
# Help
python -m tests.baseline_test --help
python -m tests.baseline_test --helpfull
# Run the baseline test with local-dev.cfg settings
python -m tests.baseline_test --flagfile="config/local-dev.cfg"
# Same as above, but using the helper script
./run.sh tests/baseline_test.py
```
### xDS Security Tests
Test suite meant to verify mTLS/TLS features. Note that this requires
additional environment configuration. For more details, and for the
setup for the security tests, see
["Setting up Traffic Director service security with proxyless gRPC"](https://cloud.google.com/traffic-director/docs/security-proxyless-setup) user guide.
```shell
# Run the security test with local-dev.cfg settings
python -m tests.security_test --flagfile="config/local-dev.cfg"
# Same as above, but using the helper script
./run.sh tests/security_test.py
```
## Helper scripts
You can use interop xds-k8s [`bin/`](https://github.com/grpc/grpc/tree/master/tools/run_tests/xds_k8s_test_driver/bin)
scripts to configure TD, start k8s instances step-by-step, and keep them alive
for as long as you need.
* To run helper scripts using local config:
* `python -m bin.script_name --flagfile=config/local-dev.cfg`
* `./run.sh bin/script_name.py` automatically appends the flagfile
* Use `--help` to see script-specific argument
* Use `--helpfull` to see all available argument
#### Overview
```shell
# Helper tool to configure Traffic Director with different security options
python -m bin.run_td_setup --help
# Helper tools to run the test server, client (with or without security)
python -m bin.run_test_server --help
python -m bin.run_test_client --help
# Helper tool to verify different security configurations via channelz
python -m bin.run_channelz --help
```
#### `./run.sh` helper
Use `./run.sh` to execute helper scripts and tests with `config/local-dev.cfg`.
```sh
USAGE: ./run.sh script_path [arguments]
script_path: path to python script to execute, relative to driver root folder
arguments ...: arguments passed to program in sys.argv
ENVIRONMENT:
XDS_K8S_CONFIG: file path to the config flagfile, relative to
driver root folder. Default: config/local-dev.cfg
Will be appended as --flagfile="config_absolute_path" argument
XDS_K8S_DRIVER_VENV_DIR: the path to python virtual environment directory
Default: $XDS_K8S_DRIVER_DIR/venv
DESCRIPTION:
This tool performs the following:
1) Ensures python virtual env installed and activated
2) Exports test driver root in PYTHONPATH
3) Automatically appends --flagfile="\$XDS_K8S_CONFIG" argument
EXAMPLES:
./run.sh bin/run_td_setup.py --help
./run.sh bin/run_td_setup.py --helpfull
XDS_K8S_CONFIG=./path-to-flagfile.cfg ./run.sh bin/run_td_setup.py --resource_suffix=override-suffix
./run.sh tests/baseline_test.py
./run.sh tests/security_test.py --verbosity=1 --logger_levels=__main__:DEBUG,framework:DEBUG
./run.sh tests/security_test.py SecurityTest.test_mtls --nocheck_local_certs
```
## Partial setups
### Regular workflow
```shell
# Setup Traffic Director
./run.sh bin/run_td_setup.py
# Start test server
./run.sh bin/run_test_server.py
# Add test server to the backend service
./run.sh bin/run_td_setup.py --cmd=backends-add
# Start test client
./run.sh bin/run_test_client.py
```
### Secure workflow
```shell
# Setup Traffic Director in mtls. See --help for all options
./run.sh bin/run_td_setup.py --security=mtls
# Start test server in a secure mode
./run.sh bin/run_test_server.py --mode=secure
# Add test server to the backend service
./run.sh bin/run_td_setup.py --cmd=backends-add
# Start test client in a secure more --mode=secure
./run.sh bin/run_test_client.py --mode=secure
```
### Sending RPCs
#### Start port forwarding
```shell
# Client: all services always on port 8079
kubectl port-forward deployment.apps/psm-grpc-client 8079
# Server regular mode: all grpc services on port 8080
kubectl port-forward deployment.apps/psm-grpc-server 8080
# OR
# Server secure mode: TestServiceImpl is on 8080,
kubectl port-forward deployment.apps/psm-grpc-server 8080
# everything else (channelz, healthcheck, CSDS) on 8081
kubectl port-forward deployment.apps/psm-grpc-server 8081
```
#### Send RPCs with grpccurl
```shell
# 8081 if security enabled
export SERVER_ADMIN_PORT=8080
# List server services using reflection
grpcurl --plaintext 127.0.0.1:$SERVER_ADMIN_PORT list
# List client services using reflection
grpcurl --plaintext 127.0.0.1:8079 list
# List channels via channelz
grpcurl --plaintext 127.0.0.1:$SERVER_ADMIN_PORT grpc.channelz.v1.Channelz.GetTopChannels
grpcurl --plaintext 127.0.0.1:8079 grpc.channelz.v1.Channelz.GetTopChannels
# Send GetClientStats to the client
grpcurl --plaintext -d '{"num_rpcs": 10, "timeout_sec": 30}' 127.0.0.1:8079 \
grpc.testing.LoadBalancerStatsService.GetClientStats
```
### Cleanup
* First, make sure to stop port forwarding, if any
* Run `./bin/cleanup.sh`
##### Partial cleanup
You can run commands below to stop/start, create/delete resources however you want.
Generally, it's better to remove resources in the opposite order of their creation.
Cleanup regular resources:
```shell
# Cleanup TD resources
./run.sh bin/run_td_setup.py --cmd=cleanup
# Stop test client
./run.sh bin/run_test_client.py --cmd=cleanup
# Stop test server, and remove the namespace
./run.sh bin/run_test_server.py --cmd=cleanup --cleanup_namespace
```
Cleanup regular and security-specific resources:
```shell
# Cleanup TD resources, with security
./run.sh bin/run_td_setup.py --cmd=cleanup --security=mtls
# Stop test client (secure)
./run.sh bin/run_test_client.py --cmd=cleanup --mode=secure
# Stop test server (secure), and remove the namespace
./run.sh bin/run_test_server.py --cmd=cleanup --cleanup_namespace --mode=secure
```
In addition, here's some other helpful partial cleanup commands:
```shell
# Remove all backends from the backend services
./run.sh bin/run_td_setup.py --cmd=backends-cleanup
# Stop the server, but keep the namespace
./run.sh bin/run_test_server.py --cmd=cleanup --nocleanup_namespace
```
### Known errors
#### Error forwarding port
If you stopped a test with `ctrl+c`, while using `--debug_use_port_forwarding`,
you might see an error like this:
> `framework.infrastructure.k8s.PortForwardingError: Error forwarding port, unexpected output Unable to listen on port 8081: Listeners failed to create with the following errors: [unable to create listener: Error listen tcp4 127.0.0.1:8081: bind: address already in use]`
Unless you're running `kubectl port-forward` manually, it's likely that `ctrl+c`
interrupted python before it could clean up subprocesses.
You can do `ps aux | grep port-forward` and then kill the processes by id,
or with `killall kubectl`
The source is migrated to https://github.com/grpc/psm-interop.

@ -1,13 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,60 +0,0 @@
#!/usr/bin/env bash
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -eo pipefail
display_usage() {
cat <<EOF >/dev/stderr
A helper to run black formatter.
USAGE: $0 [--diff]
--diff: Do not apply changes, only show the diff
--check: Do not apply changes, only print what files will be changed
ENVIRONMENT:
XDS_K8S_DRIVER_VENV_DIR: the path to python virtual environment directory
Default: $XDS_K8S_DRIVER_DIR/venv
EXAMPLES:
$0
$0 --diff
$0 --check
EOF
exit 1
}
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
display_usage
fi
SCRIPT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
readonly SCRIPT_DIR
readonly XDS_K8S_DRIVER_DIR="${SCRIPT_DIR}/.."
cd "${XDS_K8S_DRIVER_DIR}"
# Relative paths not yet supported by shellcheck.
# shellcheck source=/dev/null
source "${XDS_K8S_DRIVER_DIR}/bin/ensure_venv.sh"
if [[ "$1" == "--diff" ]]; then
readonly MODE="--diff"
elif [[ "$1" == "--check" ]]; then
readonly MODE="--check"
else
readonly MODE=""
fi
# shellcheck disable=SC2086
exec python -m black --config=../../../black.toml ${MODE} .

@ -1,59 +0,0 @@
#!/usr/bin/env bash
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -eo pipefail
display_usage() {
cat <<EOF >/dev/stderr
Performs full TD and K8S resource cleanup
USAGE: $0 [--nosecure] [arguments]
--nosecure: Skip cleanup for the resources specific for PSM Security
arguments ...: additional arguments passed to ./run.sh
ENVIRONMENT:
XDS_K8S_CONFIG: file path to the config flagfile, relative to
driver root folder. Default: config/local-dev.cfg
Will be appended as --flagfile="config_absolute_path" argument
XDS_K8S_DRIVER_VENV_DIR: the path to python virtual environment directory
Default: $XDS_K8S_DRIVER_DIR/venv
EXAMPLES:
$0
$0 --nosecure
XDS_K8S_CONFIG=./path-to-flagfile.cfg $0 --resource_suffix=override-suffix
EOF
exit 1
}
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
display_usage
fi
SCRIPT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
readonly SCRIPT_DIR
readonly XDS_K8S_DRIVER_DIR="${SCRIPT_DIR}/.."
cd "${XDS_K8S_DRIVER_DIR}"
if [[ "$1" == "--nosecure" ]]; then
shift
./run.sh bin/run_td_setup.py --cmd=cleanup "$@" && \
./run.sh bin/run_test_client.py --cmd=cleanup --cleanup_namespace "$@" && \
./run.sh bin/run_test_server.py --cmd=cleanup --cleanup_namespace "$@"
else
./run.sh bin/run_td_setup.py --cmd=cleanup --security=mtls "$@" && \
./run.sh bin/run_test_client.py --cmd=cleanup --cleanup_namespace --mode=secure "$@" && \
./run.sh bin/run_test_server.py --cmd=cleanup --cleanup_namespace --mode=secure "$@"
fi

@ -1,2 +0,0 @@
# This folder contains scripts to delete leaked resources from test runs

@ -1,714 +0,0 @@
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Clean up resources created by the tests.
This is intended as a tool to delete leaked resources from old tests.
Typical usage examples:
python3 -m bin.cleanup.cleanup \
--project=grpc-testing \
--network=default-vpc \
--kube_context=gke_grpc-testing_us-central1-a_psm-interop-security
"""
import dataclasses
import datetime
import functools
import json
import logging
import os
import re
import subprocess
import sys
from typing import Any, Callable, List, Optional
from absl import app
from absl import flags
import dateutil
from framework import xds_flags
from framework import xds_k8s_flags
from framework.helpers import retryers
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.infrastructure import traffic_director
from framework.test_app.runners.k8s import k8s_xds_client_runner
from framework.test_app.runners.k8s import k8s_xds_server_runner
logger = logging.getLogger(__name__)
Json = Any
_KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner
_KubernetesServerRunner = k8s_xds_server_runner.KubernetesServerRunner
GCLOUD = os.environ.get("GCLOUD", "gcloud")
GCLOUD_CMD_TIMEOUT_S = datetime.timedelta(seconds=5).total_seconds()
# Skip known k8s system namespaces.
K8S_PROTECTED_NAMESPACES = {
"default",
"gke-managed-system",
"kube-node-lease",
"kube-public",
"kube-system",
}
# TODO(sergiitk): these should be flags.
LEGACY_DRIVER_ZONE = "us-central1-a"
LEGACY_DRIVER_SECONDARY_ZONE = "us-west1-b"
PSM_INTEROP_PREFIX = "psm-interop" # Prefix for gke resources to delete.
URL_MAP_TEST_PREFIX = (
"interop-psm-url-map" # Prefix for url-map test resources to delete.
)
KEEP_PERIOD_HOURS = flags.DEFINE_integer(
"keep_hours",
default=48,
help=(
"number of hours for a resource to keep. Resources older than this will"
" be deleted. Default is 48 hours (2 days)"
),
)
DRY_RUN = flags.DEFINE_bool(
"dry_run",
default=False,
help="dry run, print resources but do not perform deletion",
)
TD_RESOURCE_PREFIXES = flags.DEFINE_list(
"td_resource_prefixes",
default=[PSM_INTEROP_PREFIX],
help=(
"a comma-separated list of prefixes for which the leaked TD resources"
" will be deleted"
),
)
SERVER_PREFIXES = flags.DEFINE_list(
"server_prefixes",
default=[PSM_INTEROP_PREFIX],
help=(
"a comma-separated list of prefixes for which the leaked servers will"
" be deleted"
),
)
CLIENT_PREFIXES = flags.DEFINE_list(
"client_prefixes",
default=[PSM_INTEROP_PREFIX, URL_MAP_TEST_PREFIX],
help=(
"a comma-separated list of prefixes for which the leaked clients will"
" be deleted"
),
)
MODE = flags.DEFINE_enum(
"mode",
default="td",
enum_values=["k8s", "td", "td_no_legacy"],
help="Mode: Kubernetes or Traffic Director",
)
SECONDARY = flags.DEFINE_bool(
"secondary",
default=False,
help="Cleanup secondary (alternative) resources",
)
# The cleanup script performs some API calls directly, so some flags normally
# required to configure framework properly, are not needed here.
flags.FLAGS.set_default("resource_prefix", "ignored-by-cleanup")
flags.FLAGS.set_default("td_bootstrap_image", "ignored-by-cleanup")
flags.FLAGS.set_default("server_image", "ignored-by-cleanup")
flags.FLAGS.set_default("client_image", "ignored-by-cleanup")
@dataclasses.dataclass(eq=False)
class CleanupResult:
error_count: int = 0
error_messages: List[str] = dataclasses.field(default_factory=list)
def add_error(self, msg: str):
self.error_count += 1
self.error_messages.append(f" {self.error_count}. {msg}")
def format_messages(self):
return "\n".join(self.error_messages)
@dataclasses.dataclass(frozen=True)
class K8sResourceRule:
# regex to match
expression: str
# function to delete the resource
cleanup_ns_fn: Callable
# Global state, holding the result of the whole operation.
_CLEANUP_RESULT = CleanupResult()
def load_keep_config() -> None:
global KEEP_CONFIG
json_path = os.path.realpath(
os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"keep_xds_interop_resources.json",
)
)
with open(json_path, "r") as f:
KEEP_CONFIG = json.load(f)
logging.debug(
"Resource keep config loaded: %s", json.dumps(KEEP_CONFIG, indent=2)
)
def is_marked_as_keep_gce(suffix: str) -> bool:
return suffix in KEEP_CONFIG["gce_framework"]["suffix"]
def is_marked_as_keep_gke(suffix: str) -> bool:
return suffix in KEEP_CONFIG["gke_framework"]["suffix"]
@functools.lru_cache()
def get_expire_timestamp() -> datetime.datetime:
return datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
hours=KEEP_PERIOD_HOURS.value
)
def exec_gcloud(project: str, *cmds: str) -> Json:
cmds = [GCLOUD, "--project", project, "--quiet"] + list(cmds)
if "list" in cmds:
# Add arguments to shape the list output
cmds.extend(
[
"--format",
"json",
"--filter",
f"creationTimestamp <= {get_expire_timestamp().isoformat()}",
]
)
# Executing the gcloud command
logging.debug("Executing: %s", " ".join(cmds))
proc = subprocess.Popen(
cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
# NOTE(lidiz) the gcloud subprocess won't return unless its output is read
stdout = proc.stdout.read()
stderr = proc.stderr.read()
try:
returncode = proc.wait(timeout=GCLOUD_CMD_TIMEOUT_S)
except subprocess.TimeoutExpired:
logging.error("> Timeout executing cmd [%s]", " ".join(cmds))
return None
if returncode:
logging.error(
"> Failed to execute cmd [%s], returned %d, stderr: %s",
" ".join(cmds),
returncode,
stderr,
)
return None
if stdout:
return json.loads(stdout)
return None
def cleanup_legacy_driver_resources(*, project: str, suffix: str, **kwargs):
"""Removing GCP resources created by run_xds_tests.py."""
# Unused, but kept for compatibility with cleanup_td_for_gke.
del kwargs
logging.info(
"----- Removing run_xds_tests.py resources with suffix [%s]", suffix
)
exec_gcloud(
project,
"compute",
"forwarding-rules",
"delete",
f"test-forwarding-rule{suffix}",
"--global",
)
exec_gcloud(
project,
"compute",
"target-http-proxies",
"delete",
f"test-target-proxy{suffix}",
)
exec_gcloud(
project,
"alpha",
"compute",
"target-grpc-proxies",
"delete",
f"test-target-proxy{suffix}",
)
exec_gcloud(project, "compute", "url-maps", "delete", f"test-map{suffix}")
exec_gcloud(
project,
"compute",
"backend-services",
"delete",
f"test-backend-service{suffix}",
"--global",
)
exec_gcloud(
project,
"compute",
"backend-services",
"delete",
f"test-backend-service-alternate{suffix}",
"--global",
)
exec_gcloud(
project,
"compute",
"backend-services",
"delete",
f"test-backend-service-extra{suffix}",
"--global",
)
exec_gcloud(
project,
"compute",
"backend-services",
"delete",
f"test-backend-service-more-extra{suffix}",
"--global",
)
exec_gcloud(
project, "compute", "firewall-rules", "delete", f"test-fw-rule{suffix}"
)
exec_gcloud(
project, "compute", "health-checks", "delete", f"test-hc{suffix}"
)
exec_gcloud(
project,
"compute",
"instance-groups",
"managed",
"delete",
f"test-ig{suffix}",
"--zone",
LEGACY_DRIVER_ZONE,
)
exec_gcloud(
project,
"compute",
"instance-groups",
"managed",
"delete",
f"test-ig-same-zone{suffix}",
"--zone",
LEGACY_DRIVER_ZONE,
)
exec_gcloud(
project,
"compute",
"instance-groups",
"managed",
"delete",
f"test-ig-secondary-zone{suffix}",
"--zone",
LEGACY_DRIVER_SECONDARY_ZONE,
)
exec_gcloud(
project,
"compute",
"instance-templates",
"delete",
f"test-template{suffix}",
)
# cleanup_td creates TrafficDirectorManager (and its varients for security and
# AppNet), and then calls the cleanup() methods.
#
# Note that the varients are all based on the basic TrafficDirectorManager, so
# their `cleanup()` might do duplicate work. But deleting an non-exist resource
# returns 404, and is OK.
def cleanup_td_for_gke(*, project, prefix, suffix, network):
gcp_api_manager = gcp.api.GcpApiManager()
plain_td = traffic_director.TrafficDirectorManager(
gcp_api_manager,
project=project,
network=network,
resource_prefix=prefix,
resource_suffix=suffix,
)
security_td = traffic_director.TrafficDirectorSecureManager(
gcp_api_manager,
project=project,
network=network,
resource_prefix=prefix,
resource_suffix=suffix,
)
# TODO: cleanup appnet resources.
# appnet_td = traffic_director.TrafficDirectorAppNetManager(
# gcp_api_manager,
# project=project,
# network=network,
# resource_prefix=resource_prefix,
# resource_suffix=resource_suffix)
logger.info(
"----- Removing traffic director for gke, prefix %s, suffix %s",
prefix,
suffix,
)
security_td.cleanup(force=True)
# appnet_td.cleanup(force=True)
plain_td.cleanup(force=True)
# cleanup_client creates a client runner, and calls its cleanup() method.
def cleanup_client(
project,
network,
k8s_api_manager,
client_namespace,
gcp_api_manager,
gcp_service_account,
*,
suffix: Optional[str] = "",
):
deployment_name = xds_flags.CLIENT_NAME.value
if suffix:
deployment_name = f"{deployment_name}-{suffix}"
ns = k8s.KubernetesNamespace(k8s_api_manager, client_namespace)
# Shorten the timeout to avoid waiting for the stuck namespaces.
# Normal ns deletion during the cleanup takes less two minutes.
ns.wait_for_namespace_deleted_timeout_sec = 5 * 60
client_runner = _KubernetesClientRunner(
k8s_namespace=ns,
deployment_name=deployment_name,
gcp_project=project,
network=network,
gcp_service_account=gcp_service_account,
gcp_api_manager=gcp_api_manager,
image_name="",
td_bootstrap_image="",
)
logger.info("Cleanup client")
try:
client_runner.cleanup(force=True, force_namespace=True)
except retryers.RetryError as err:
logger.error(
"Timeout waiting for namespace %s deletion. "
"Failed resource status:\n\n%s",
ns.name,
ns.pretty_format_status(err.result()),
)
raise
# cleanup_server creates a server runner, and calls its cleanup() method.
def cleanup_server(
project,
network,
k8s_api_manager,
server_namespace,
gcp_api_manager,
gcp_service_account,
*,
suffix: Optional[str] = "",
):
deployment_name = xds_flags.SERVER_NAME.value
if suffix:
deployment_name = f"{deployment_name}-{suffix}"
ns = k8s.KubernetesNamespace(k8s_api_manager, server_namespace)
# Shorten the timeout to avoid waiting for the stuck namespaces.
# Normal ns deletion during the cleanup takes less two minutes.
ns.wait_for_namespace_deleted_timeout_sec = 5 * 60
server_runner = _KubernetesServerRunner(
k8s_namespace=ns,
deployment_name=deployment_name,
gcp_project=project,
network=network,
gcp_service_account=gcp_service_account,
gcp_api_manager=gcp_api_manager,
image_name="",
td_bootstrap_image="",
)
logger.info("Cleanup server")
try:
server_runner.cleanup(force=True, force_namespace=True)
except retryers.RetryError as err:
logger.error(
"Timeout waiting for namespace %s deletion. "
"Failed resource status:\n\n%s",
ns.name,
ns.pretty_format_status(err.result()),
)
raise
def delete_leaked_td_resources(
dry_run, td_resource_rules, project, network, resources
):
for resource in resources:
logger.info("-----")
logger.info("----- Cleaning up resource %s", resource["name"])
if dry_run:
# Skip deletion for dry-runs
logging.info("----- Skipped [Dry Run]: %s", resource["name"])
continue
matched = False
for regex, resource_prefix, keep, remove_fn in td_resource_rules:
result = re.search(regex, resource["name"])
if result is not None:
matched = True
if keep(result.group(1)):
logging.info("Skipped [keep]:")
break # break inner loop, continue outer loop
remove_fn(
project=project,
prefix=resource_prefix,
suffix=result.group(1),
network=network,
)
break
if not matched:
logging.info(
"----- Skipped [does not matching resource name templates]"
)
def delete_k8s_resources(
dry_run,
k8s_resource_rules,
project,
network,
k8s_api_manager,
gcp_service_account,
namespaces,
):
gcp_api_manager = gcp.api.GcpApiManager()
for ns in namespaces:
namespace_name: str = ns.metadata.name
if namespace_name in K8S_PROTECTED_NAMESPACES:
continue
logger.info("-----")
logger.info("----- Cleaning up k8s namespaces %s", namespace_name)
if ns.metadata.creation_timestamp > get_expire_timestamp():
logging.info(
"----- Skipped [resource is within expiry date]: %s",
namespace_name,
)
continue
if dry_run:
# Skip deletion for dry-runs
logging.info("----- Skipped [Dry Run]: %s", ns.metadata.name)
continue
rule: K8sResourceRule = _rule_match_k8s_namespace(
namespace_name, k8s_resource_rules
)
if not rule:
logging.info(
"----- Skipped [does not matching resource name templates]: %s",
namespace_name,
)
continue
# Cleaning up.
try:
rule.cleanup_ns_fn(
project,
network,
k8s_api_manager,
namespace_name,
gcp_api_manager,
gcp_service_account,
suffix=("alt" if SECONDARY.value else None),
)
except k8s.NotFound:
logging.warning("----- Skipped [not found]: %s", namespace_name)
except retryers.RetryError as err:
_CLEANUP_RESULT.add_error(
"Retries exhausted while waiting for the "
f"deletion of namespace {namespace_name}: "
f"{err}"
)
logging.exception(
"----- Skipped [cleanup timed out]: %s", namespace_name
)
except Exception as err: # noqa pylint: disable=broad-except
_CLEANUP_RESULT.add_error(
"Unexpected error while deleting "
f"namespace {namespace_name}: {err}"
)
logging.exception(
"----- Skipped [cleanup unexpected error]: %s", namespace_name
)
logger.info("-----")
def _rule_match_k8s_namespace(
namespace_name: str, k8s_resource_rules: List[K8sResourceRule]
) -> Optional[K8sResourceRule]:
for rule in k8s_resource_rules:
result = re.search(rule.expression, namespace_name)
if result is not None:
return rule
return None
def find_and_remove_leaked_k8s_resources(
dry_run, project, network, gcp_service_account, k8s_context
):
k8s_resource_rules: List[K8sResourceRule] = []
for prefix in CLIENT_PREFIXES.value:
k8s_resource_rules.append(
K8sResourceRule(f"{prefix}-client-(.*)", cleanup_client)
)
for prefix in SERVER_PREFIXES.value:
k8s_resource_rules.append(
K8sResourceRule(f"{prefix}-server-(.*)", cleanup_server)
)
# Delete leaked k8s namespaces, those usually mean there are leaked testing
# client/servers from the gke framework.
k8s_api_manager = k8s.KubernetesApiManager(k8s_context)
nss = k8s_api_manager.core.list_namespace()
delete_k8s_resources(
dry_run,
k8s_resource_rules,
project,
network,
k8s_api_manager,
gcp_service_account,
nss.items,
)
def find_and_remove_leaked_td_resources(dry_run, project, network):
cleanup_legacy: bool = MODE.value != "td_no_legacy"
td_resource_rules = [
# itmes in each tuple, in order
# - regex to match
# - prefix of the resource (only used by gke resources)
# - function to check of the resource should be kept
# - function to delete the resource
]
if cleanup_legacy:
td_resource_rules += [
(
r"test-hc(.*)",
"",
is_marked_as_keep_gce,
cleanup_legacy_driver_resources,
),
(
r"test-template(.*)",
"",
is_marked_as_keep_gce,
cleanup_legacy_driver_resources,
),
]
for prefix in TD_RESOURCE_PREFIXES.value:
td_resource_rules.append(
(
f"{prefix}-health-check-(.*)",
prefix,
is_marked_as_keep_gke,
cleanup_td_for_gke,
),
)
# List resources older than KEEP_PERIOD. We only list health-checks and
# instance templates because these are leaves in the resource dependency
# tree.
#
# E.g. forwarding-rule depends on the target-proxy. So leaked
# forwarding-rule indicates there's a leaked target-proxy (because this
# target proxy cannot deleted unless the forwarding rule is deleted). The
# leaked target-proxy is guaranteed to be a super set of leaked
# forwarding-rule.
compute = gcp.compute.ComputeV1(gcp.api.GcpApiManager(), project)
leaked_health_checks = []
for item in compute.list_health_check()["items"]:
if (
dateutil.parser.isoparse(item["creationTimestamp"])
<= get_expire_timestamp()
):
leaked_health_checks.append(item)
delete_leaked_td_resources(
dry_run, td_resource_rules, project, network, leaked_health_checks
)
# Delete leaked instance templates, those usually mean there are leaked VMs
# from the gce framework. Also note that this is only needed for the gce
# resources.
if cleanup_legacy:
leaked_instance_templates = exec_gcloud(
project, "compute", "instance-templates", "list"
)
delete_leaked_td_resources(
dry_run,
td_resource_rules,
project,
network,
leaked_instance_templates,
)
def main(argv):
# TODO(sergiitk): instead, base on absltest so that result.xml is available.
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
load_keep_config()
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
project: str = xds_flags.PROJECT.value
network: str = xds_flags.NETWORK.value
gcp_service_account: str = xds_k8s_flags.GCP_SERVICE_ACCOUNT.value
dry_run: bool = DRY_RUN.value
k8s_context: str = xds_k8s_flags.KUBE_CONTEXT.value
if MODE.value == "td" or MODE.value == "td_no_legacy":
find_and_remove_leaked_td_resources(dry_run, project, network)
elif MODE.value == "k8s":
# 'unset' value is used in td-only mode to bypass the validation
# for the required flag.
assert k8s_context != "unset"
find_and_remove_leaked_k8s_resources(
dry_run, project, network, gcp_service_account, k8s_context
)
logger.info("##################### Done cleaning up #####################")
if _CLEANUP_RESULT.error_count > 0:
logger.error(
"Cleanup failed for %i resource(s). Errors: [\n%s\n].\n"
"Please inspect the log files for stack traces corresponding "
"to these errors.",
_CLEANUP_RESULT.error_count,
_CLEANUP_RESULT.format_messages(),
)
sys.exit(1)
if __name__ == "__main__":
app.run(main)

@ -1,8 +0,0 @@
{
"gce_framework": {
"suffix": []
},
"gke_framework": {
"suffix": []
}
}

@ -1,95 +0,0 @@
#!/usr/bin/env bash
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -eo pipefail
SCRIPT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
readonly SCRIPT_DIR
readonly XDS_K8S_DRIVER_DIR="${SCRIPT_DIR}/.."
cd "${XDS_K8S_DRIVER_DIR}"
NO_SECURE="yes"
DATE_TO=$(date -Iseconds)
while [[ $# -gt 0 ]]; do
case $1 in
--secure) NO_SECURE=""; shift ;;
--date_to=*) DATE_TO="${1#*=}T00:00:00Z"; shift ;;
*) echo "Unknown argument $1"; exit 1 ;;
esac
done
jq_selector=$(cat <<- 'EOM'
.items[].metadata |
select(
(.name | test("-(client|server)-")) and
(.creationTimestamp < $date_to)
) | .name
EOM
)
mapfile -t namespaces < <(\
kubectl get namespaces --sort-by='{.metadata.creationTimestamp}'\
--selector='owner=xds-k8s-interop-test'\
-o json\
| jq --arg date_to "${DATE_TO}" -r "${jq_selector}"
)
if [[ -z "${namespaces[*]}" ]]; then
echo "All clean."
exit 0
fi
echo "Found namespaces:"
namespaces_joined=$(IFS=,; printf '%s' "${namespaces[*]}")
kubectl get namespaces --sort-by='{.metadata.creationTimestamp}' \
--selector="name in (${namespaces_joined})"
# Suffixes
mapfile -t suffixes < <(\
printf '%s\n' "${namespaces[@]}" | sed -E 's/^.+-(server|client)-//'
)
echo
echo "Found suffixes: ${suffixes[*]}"
echo "Count: ${#namespaces[@]}"
echo "Run plan:"
for suffix in "${suffixes[@]}"; do
echo ./bin/cleanup.sh ${NO_SECURE:+"--nosecure"} "--resource_suffix=${suffix}"
done
read -r -n 1 -p "Continue? (y/N) " answer
if [[ "$answer" != "${answer#[Yy]}" ]] ;then
echo
echo "Starting the cleanup."
else
echo
echo "Exit"
exit 0
fi
failed=0
for suffix in "${suffixes[@]}"; do
echo "-------------------- Cleaning suffix ${suffix} --------------------"
set -x
./bin/cleanup.sh ${NO_SECURE:+"--nosecure"} "--resource_suffix=${suffix}" || (( ++failed ))
set +x
echo "-------------------- Finished cleaning ${suffix} --------------------"
done
echo "Failed runs: ${failed}"
if (( failed > 0 )); then
exit 1
fi

@ -1,29 +0,0 @@
#!/usr/bin/env bash
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Expected $XDS_K8S_DRIVER_DIR to be set by the file sourcing this.
readonly XDS_K8S_DRIVER_VENV_DIR="${XDS_K8S_DRIVER_VENV_DIR:-$XDS_K8S_DRIVER_DIR/venv}"
if [[ -z "${VIRTUAL_ENV}" ]]; then
if [[ -d "${XDS_K8S_DRIVER_VENV_DIR}" ]]; then
# Intentional: No need to check python venv activate script.
# shellcheck source=/dev/null
source "${XDS_K8S_DRIVER_VENV_DIR}/bin/activate"
else
echo "Missing python virtual environment directory: ${XDS_K8S_DRIVER_VENV_DIR}" >&2
echo "Follow README.md installation steps first." >&2
exit 1
fi
fi

@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -exo pipefail
VENV_NAME="venv-$(mktemp -d)"
readonly VENV_NAME
python3 -m virtualenv "${VENV_NAME}"
"${VENV_NAME}"/bin/pip install -r requirements.txt
"${VENV_NAME}"/bin/pip freeze --require-virtualenv --local -r requirements.txt \
> requirements.lock
rm -rf "${VENV_NAME}"

@ -1,60 +0,0 @@
#!/usr/bin/env bash
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -eo pipefail
display_usage() {
cat <<EOF >/dev/stderr
A helper to run isort import sorter.
USAGE: $0 [--diff]
--diff: Do not apply changes, only show the diff
ENVIRONMENT:
XDS_K8S_DRIVER_VENV_DIR: the path to python virtual environment directory
Default: $XDS_K8S_DRIVER_DIR/venv
EXAMPLES:
$0
$0 --diff
EOF
exit 1
}
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
display_usage
fi
SCRIPT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
readonly SCRIPT_DIR
readonly XDS_K8S_DRIVER_DIR="${SCRIPT_DIR}/.."
cd "${XDS_K8S_DRIVER_DIR}"
# Relative paths not yet supported by shellcheck.
# shellcheck source=/dev/null
source "${XDS_K8S_DRIVER_DIR}/bin/ensure_venv.sh"
if [[ "$1" == "--diff" ]]; then
readonly MODE="--diff"
else
readonly MODE="--overwrite-in-place"
fi
# typing is the only module allowed to put imports on the same line:
# https://google.github.io/styleguide/pyguide.html#313-imports-formatting
exec python -m isort "${MODE}" \
--settings-path=../../../black.toml \
framework bin tests

@ -1,13 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,191 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common functionality for bin/ python helpers."""
import atexit
import signal
import sys
from absl import logging
from framework import xds_flags
from framework import xds_k8s_flags
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.test_app import client_app
from framework.test_app import server_app
from framework.test_app.runners.k8s import gamma_server_runner
from framework.test_app.runners.k8s import k8s_xds_client_runner
from framework.test_app.runners.k8s import k8s_xds_server_runner
logger = logging.get_absl_logger()
# Type aliases
KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner
KubernetesServerRunner = k8s_xds_server_runner.KubernetesServerRunner
GammaServerRunner = gamma_server_runner.GammaServerRunner
_XdsTestServer = server_app.XdsTestServer
_XdsTestClient = client_app.XdsTestClient
def make_client_namespace(
k8s_api_manager: k8s.KubernetesApiManager,
namespace_name: str = None,
) -> k8s.KubernetesNamespace:
if not namespace_name:
namespace_name: str = KubernetesClientRunner.make_namespace_name(
xds_flags.RESOURCE_PREFIX.value, xds_flags.RESOURCE_SUFFIX.value
)
return k8s.KubernetesNamespace(k8s_api_manager, namespace_name)
def make_client_runner(
namespace: k8s.KubernetesNamespace,
gcp_api_manager: gcp.api.GcpApiManager,
*,
port_forwarding: bool = False,
reuse_namespace: bool = True,
enable_workload_identity: bool = True,
mode: str = "default",
) -> KubernetesClientRunner:
# KubernetesClientRunner arguments.
runner_kwargs = dict(
deployment_name=xds_flags.CLIENT_NAME.value,
image_name=xds_k8s_flags.CLIENT_IMAGE.value,
td_bootstrap_image=xds_k8s_flags.TD_BOOTSTRAP_IMAGE.value,
gcp_project=xds_flags.PROJECT.value,
gcp_api_manager=gcp_api_manager,
gcp_service_account=xds_k8s_flags.GCP_SERVICE_ACCOUNT.value,
xds_server_uri=xds_flags.XDS_SERVER_URI.value,
network=xds_flags.NETWORK.value,
stats_port=xds_flags.CLIENT_PORT.value,
reuse_namespace=reuse_namespace,
debug_use_port_forwarding=port_forwarding,
enable_workload_identity=enable_workload_identity,
)
if mode == "secure":
runner_kwargs.update(
deployment_template="client-secure.deployment.yaml"
)
return KubernetesClientRunner(namespace, **runner_kwargs)
def make_server_namespace(
k8s_api_manager: k8s.KubernetesApiManager,
server_runner: KubernetesServerRunner = KubernetesServerRunner,
) -> k8s.KubernetesNamespace:
namespace_name: str = server_runner.make_namespace_name(
xds_flags.RESOURCE_PREFIX.value, xds_flags.RESOURCE_SUFFIX.value
)
return k8s.KubernetesNamespace(k8s_api_manager, namespace_name)
def make_server_runner(
namespace: k8s.KubernetesNamespace,
gcp_api_manager: gcp.api.GcpApiManager,
*,
port_forwarding: bool = False,
reuse_namespace: bool = True,
reuse_service: bool = False,
enable_workload_identity: bool = True,
mode: str = "default",
) -> KubernetesServerRunner:
# KubernetesServerRunner arguments.
runner_kwargs = dict(
deployment_name=xds_flags.SERVER_NAME.value,
image_name=xds_k8s_flags.SERVER_IMAGE.value,
td_bootstrap_image=xds_k8s_flags.TD_BOOTSTRAP_IMAGE.value,
xds_server_uri=xds_flags.XDS_SERVER_URI.value,
gcp_project=xds_flags.PROJECT.value,
gcp_api_manager=gcp_api_manager,
gcp_service_account=xds_k8s_flags.GCP_SERVICE_ACCOUNT.value,
network=xds_flags.NETWORK.value,
reuse_namespace=reuse_namespace,
reuse_service=reuse_service,
debug_use_port_forwarding=port_forwarding,
enable_workload_identity=enable_workload_identity,
)
server_runner = KubernetesServerRunner
if mode == "secure":
runner_kwargs["deployment_template"] = "server-secure.deployment.yaml"
elif mode == "gamma":
server_runner = GammaServerRunner
return server_runner(namespace, **runner_kwargs)
def _ensure_atexit(signum, frame):
"""Needed to handle signals or atexit handler won't be called."""
del frame
# Pylint is wrong about "Module 'signal' has no 'Signals' member":
# https://docs.python.org/3/library/signal.html#signal.Signals
sig = signal.Signals(signum) # pylint: disable=no-member
logger.warning("Caught %r, initiating graceful shutdown...\n", sig)
sys.exit(1)
def _graceful_exit(
server_runner: KubernetesServerRunner, client_runner: KubernetesClientRunner
):
"""Stop port forwarding processes."""
client_runner.stop_pod_dependencies()
server_runner.stop_pod_dependencies()
def register_graceful_exit(
server_runner: KubernetesServerRunner, client_runner: KubernetesClientRunner
):
atexit.register(_graceful_exit, server_runner, client_runner)
for signum in (signal.SIGTERM, signal.SIGHUP, signal.SIGINT):
signal.signal(signum, _ensure_atexit)
def get_client_pod(
client_runner: KubernetesClientRunner, deployment_name: str
) -> k8s.V1Pod:
client_deployment: k8s.V1Deployment
client_deployment = client_runner.k8s_namespace.get_deployment(
deployment_name
)
client_pod_name: str = client_runner._wait_deployment_pod_count(
client_deployment
)[0]
return client_runner._wait_pod_started(client_pod_name)
def get_server_pod(
server_runner: KubernetesServerRunner, deployment_name: str
) -> k8s.V1Pod:
server_deployment: k8s.V1Deployment
server_deployment = server_runner.k8s_namespace.get_deployment(
deployment_name
)
server_pod_name: str = server_runner._wait_deployment_pod_count(
server_deployment
)[0]
return server_runner._wait_pod_started(server_pod_name)
def get_test_server_for_pod(
server_runner: KubernetesServerRunner, server_pod: k8s.V1Pod, **kwargs
) -> _XdsTestServer:
return server_runner._xds_test_server_for_pod(server_pod, **kwargs)
def get_test_client_for_pod(
client_runner: KubernetesClientRunner, client_pod: k8s.V1Pod, **kwargs
) -> _XdsTestClient:
return client_runner._xds_test_client_for_pod(client_pod, **kwargs)

@ -1,271 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Channelz debugging tool for xDS test client/server.
This is intended as a debugging / local development helper and not executed
as a part of interop test suites.
Typical usage examples:
# Show channel and server socket pair
python -m bin.run_channelz --flagfile=config/local-dev.cfg
# Evaluate setup for different security configurations
python -m bin.run_channelz --flagfile=config/local-dev.cfg --security=tls
python -m bin.run_channelz --flagfile=config/local-dev.cfg --security=mtls_error
# More information and usage options
python -m bin.run_channelz --helpfull
"""
import hashlib
from absl import app
from absl import flags
from absl import logging
from bin.lib import common
from framework import xds_flags
from framework import xds_k8s_flags
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.rpc import grpc_channelz
from framework.test_app import client_app
from framework.test_app import server_app
# Flags
_SECURITY = flags.DEFINE_enum(
"security",
default=None,
enum_values=[
"mtls",
"tls",
"plaintext",
"mtls_error",
"server_authz_error",
],
help="Show info for a security setup",
)
flags.adopt_module_key_flags(xds_flags)
flags.adopt_module_key_flags(xds_k8s_flags)
# Running outside of a test suite, so require explicit resource_suffix.
flags.mark_flag_as_required(xds_flags.RESOURCE_SUFFIX.name)
flags.register_validator(
xds_flags.SERVER_XDS_PORT.name,
lambda val: val > 0,
message=(
"Run outside of a test suite, must provide"
" the exact port value (must be greater than 0)."
),
)
logger = logging.get_absl_logger()
# Type aliases
_Channel = grpc_channelz.Channel
_Socket = grpc_channelz.Socket
_ChannelState = grpc_channelz.ChannelState
_XdsTestServer = server_app.XdsTestServer
_XdsTestClient = client_app.XdsTestClient
def debug_cert(cert):
if not cert:
return "<missing>"
sha1 = hashlib.sha1(cert)
return f"sha1={sha1.hexdigest()}, len={len(cert)}"
def debug_sock_tls(tls):
return (
f"local: {debug_cert(tls.local_certificate)}\n"
f"remote: {debug_cert(tls.remote_certificate)}"
)
def get_deployment_pods(k8s_ns, deployment_name):
deployment = k8s_ns.get_deployment(deployment_name)
return k8s_ns.list_deployment_pods(deployment)
def debug_security_setup_negative(test_client):
"""Debug negative cases: mTLS Error, Server AuthZ error
1) mTLS Error: Server expects client mTLS cert,
but client configured only for TLS.
2) AuthZ error: Client does not authorize server because of mismatched
SAN name.
"""
# Client side.
client_correct_setup = True
channel: _Channel = test_client.wait_for_server_channel_state(
state=_ChannelState.TRANSIENT_FAILURE
)
try:
subchannel, *subchannels = list(
test_client.channelz.list_channel_subchannels(channel)
)
except ValueError:
print(
"Client setup fail: subchannel not found. "
"Common causes: test client didn't connect to TD; "
"test client exhausted retries, and closed all subchannels."
)
return
# Client must have exactly one subchannel.
logger.debug("Found subchannel, %s", subchannel)
if subchannels:
client_correct_setup = False
print(f"Unexpected subchannels {subchannels}")
subchannel_state: _ChannelState = subchannel.data.state.state
if subchannel_state is not _ChannelState.TRANSIENT_FAILURE:
client_correct_setup = False
print(
"Subchannel expected to be in "
"TRANSIENT_FAILURE, same as its channel"
)
# Client subchannel must have no sockets.
sockets = list(test_client.channelz.list_subchannels_sockets(subchannel))
if sockets:
client_correct_setup = False
print(f"Unexpected subchannel sockets {sockets}")
# Results.
if client_correct_setup:
print(
"Client setup pass: the channel "
"to the server has exactly one subchannel "
"in TRANSIENT_FAILURE, and no sockets"
)
def debug_security_setup_positive(test_client, test_server):
"""Debug positive cases: mTLS, TLS, Plaintext."""
test_client.wait_for_server_channel_ready()
client_sock: _Socket = test_client.get_active_server_channel_socket()
server_sock: _Socket = test_server.get_server_socket_matching_client(
client_sock
)
server_tls = server_sock.security.tls
client_tls = client_sock.security.tls
print(f"\nServer certs:\n{debug_sock_tls(server_tls)}")
print(f"\nClient certs:\n{debug_sock_tls(client_tls)}")
print()
if server_tls.local_certificate:
eq = server_tls.local_certificate == client_tls.remote_certificate
print(f"(TLS) Server local matches client remote: {eq}")
else:
print("(TLS) Not detected")
if server_tls.remote_certificate:
eq = server_tls.remote_certificate == client_tls.local_certificate
print(f"(mTLS) Server remote matches client local: {eq}")
else:
print("(mTLS) Not detected")
def debug_basic_setup(test_client, test_server):
"""Show channel and server socket pair"""
test_client.wait_for_server_channel_ready()
client_sock: _Socket = test_client.get_active_server_channel_socket()
server_sock: _Socket = test_server.get_server_socket_matching_client(
client_sock
)
logger.debug("Client socket: %s\n", client_sock)
logger.debug("Matching server socket: %s\n", server_sock)
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
# Flags.
should_port_forward: bool = xds_k8s_flags.DEBUG_USE_PORT_FORWARDING.value
enable_workload_identity: bool = (
xds_k8s_flags.ENABLE_WORKLOAD_IDENTITY.value
)
is_secure: bool = bool(_SECURITY.value)
# Setup.
gcp_api_manager = gcp.api.GcpApiManager()
k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
# Server.
server_namespace = common.make_server_namespace(k8s_api_manager)
server_runner = common.make_server_runner(
server_namespace,
gcp_api_manager,
port_forwarding=should_port_forward,
enable_workload_identity=enable_workload_identity,
mode="secure",
)
# Find server pod.
server_pod: k8s.V1Pod = common.get_server_pod(
server_runner, xds_flags.SERVER_NAME.value
)
# Client
client_namespace = common.make_client_namespace(k8s_api_manager)
client_runner = common.make_client_runner(
client_namespace,
gcp_api_manager,
port_forwarding=should_port_forward,
enable_workload_identity=enable_workload_identity,
mode="secure",
)
# Find client pod.
client_pod: k8s.V1Pod = common.get_client_pod(
client_runner, xds_flags.CLIENT_NAME.value
)
# Ensure port forwarding stopped.
common.register_graceful_exit(server_runner, client_runner)
# Create server app for the server pod.
test_server: _XdsTestServer = common.get_test_server_for_pod(
server_runner,
server_pod,
test_port=xds_flags.SERVER_PORT.value,
secure_mode=is_secure,
)
test_server.set_xds_address(
xds_flags.SERVER_XDS_HOST.value, xds_flags.SERVER_XDS_PORT.value
)
# Create client app for the client pod.
test_client: _XdsTestClient = common.get_test_client_for_pod(
client_runner, client_pod, server_target=test_server.xds_uri
)
with test_client, test_server:
if _SECURITY.value in ("mtls", "tls", "plaintext"):
debug_security_setup_positive(test_client, test_server)
elif _SECURITY.value in ("mtls_error", "server_authz_error"):
debug_security_setup_negative(test_client)
else:
debug_basic_setup(test_client, test_server)
logger.info("SUCCESS!")
if __name__ == "__main__":
app.run(main)

@ -1,169 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl import app
from absl import flags
from absl import logging
from bin.lib import common
from framework import xds_flags
from framework import xds_k8s_flags
from framework.helpers import grpc as helpers_grpc
import framework.helpers.highlighter
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.rpc import grpc_channelz
from framework.rpc import grpc_testing
from framework.test_app import client_app
from framework.test_app import server_app
# Flags
_MODE = flags.DEFINE_enum(
"mode",
default="default",
enum_values=["default", "secure", "gamma"],
help="Select a deployment of the client/server",
)
_NUM_RPCS = flags.DEFINE_integer(
"num_rpcs",
default=100,
lower_bound=1,
upper_bound=10_000,
help="The number of RPCs to check.",
)
flags.adopt_module_key_flags(xds_flags)
flags.adopt_module_key_flags(xds_k8s_flags)
# Running outside of a test suite, so require explicit resource_suffix.
flags.mark_flag_as_required(xds_flags.RESOURCE_SUFFIX.name)
flags.register_validator(
xds_flags.SERVER_XDS_PORT.name,
lambda val: val > 0,
message=(
"Run outside of a test suite, must provide"
" the exact port value (must be greater than 0)."
),
)
logger = logging.get_absl_logger()
# Type aliases
_Channel = grpc_channelz.Channel
_Socket = grpc_channelz.Socket
_ChannelState = grpc_channelz.ChannelState
_XdsTestServer = server_app.XdsTestServer
_XdsTestClient = client_app.XdsTestClient
LoadBalancerStatsResponse = grpc_testing.LoadBalancerStatsResponse
def get_client_rpc_stats(
test_client: _XdsTestClient, num_rpcs: int
) -> LoadBalancerStatsResponse:
lb_stats = test_client.get_load_balancer_stats(num_rpcs=num_rpcs)
hl = framework.helpers.highlighter.HighlighterYaml()
logger.info(
"[%s] Received LoadBalancerStatsResponse:\n%s",
test_client.hostname,
hl.highlight(helpers_grpc.lb_stats_pretty(lb_stats)),
)
return lb_stats
def run_ping_pong(test_client: _XdsTestClient, num_rpcs: int):
test_client.wait_for_active_xds_channel()
test_client.wait_for_server_channel_ready()
lb_stats = get_client_rpc_stats(test_client, num_rpcs)
for backend, rpcs_count in lb_stats.rpcs_by_peer.items():
if int(rpcs_count) < 1:
raise AssertionError(
f"Backend {backend} did not receive a single RPC"
)
failed = int(lb_stats.num_failures)
if int(lb_stats.num_failures) > 0:
raise AssertionError(
f"Expected all RPCs to succeed: {failed} of {num_rpcs} failed"
)
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
# Flags.
should_port_forward: bool = xds_k8s_flags.DEBUG_USE_PORT_FORWARDING.value
enable_workload_identity: bool = (
xds_k8s_flags.ENABLE_WORKLOAD_IDENTITY.value
)
# Setup.
gcp_api_manager = gcp.api.GcpApiManager()
k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
# Server.
server_namespace = common.make_server_namespace(k8s_api_manager)
server_runner = common.make_server_runner(
server_namespace,
gcp_api_manager,
port_forwarding=should_port_forward,
enable_workload_identity=enable_workload_identity,
mode=_MODE.value,
)
# Find server pod.
server_pod: k8s.V1Pod = common.get_server_pod(
server_runner, xds_flags.SERVER_NAME.value
)
# Client
client_namespace = common.make_client_namespace(k8s_api_manager)
client_runner = common.make_client_runner(
client_namespace,
gcp_api_manager,
port_forwarding=should_port_forward,
enable_workload_identity=enable_workload_identity,
mode=_MODE.value,
)
# Find client pod.
client_pod: k8s.V1Pod = common.get_client_pod(
client_runner, xds_flags.CLIENT_NAME.value
)
# Ensure port forwarding stopped.
common.register_graceful_exit(server_runner, client_runner)
# Create server app for the server pod.
test_server: _XdsTestServer = common.get_test_server_for_pod(
server_runner,
server_pod,
test_port=xds_flags.SERVER_PORT.value,
secure_mode=_MODE.value == "secure",
)
test_server.set_xds_address(
xds_flags.SERVER_XDS_HOST.value, xds_flags.SERVER_XDS_PORT.value
)
# Create client app for the client pod.
test_client: _XdsTestClient = common.get_test_client_for_pod(
client_runner, client_pod, server_target=test_server.xds_uri
)
with test_client, test_server:
run_ping_pong(test_client, _NUM_RPCS.value)
logger.info("SUCCESS!")
if __name__ == "__main__":
app.run(main)

@ -1,310 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Configure Traffic Director for different GRPC Proxyless.
This is intended as a debugging / local development helper and not executed
as a part of interop test suites.
Typical usage examples:
# Regular proxyless setup
python -m bin.run_td_setup --flagfile=config/local-dev.cfg
# Additional commands: cleanup, backend management, etc.
python -m bin.run_td_setup --flagfile=config/local-dev.cfg --cmd=cleanup
# PSM security setup options: mtls, tls, etc.
python -m bin.run_td_setup --flagfile=config/local-dev.cfg --security=mtls
# More information and usage options
python -m bin.run_td_setup --helpfull
"""
import logging
from absl import app
from absl import flags
from framework import xds_flags
from framework import xds_k8s_flags
from framework.helpers import rand
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.infrastructure import traffic_director
from framework.test_app.runners.k8s import k8s_xds_server_runner
logger = logging.getLogger(__name__)
# Flags
_CMD = flags.DEFINE_enum(
"cmd",
default="create",
enum_values=[
"cycle",
"create",
"cleanup",
"backends-add",
"backends-cleanup",
"unused-xds-port",
],
help="Command",
)
_SECURITY = flags.DEFINE_enum(
"security",
default=None,
enum_values=[
"mtls",
"tls",
"plaintext",
"mtls_error",
"server_authz_error",
],
help="Configure TD with security",
)
flags.adopt_module_key_flags(xds_flags)
flags.adopt_module_key_flags(xds_k8s_flags)
# Running outside of a test suite, so require explicit resource_suffix.
flags.mark_flag_as_required(xds_flags.RESOURCE_SUFFIX.name)
@flags.multi_flags_validator(
(xds_flags.SERVER_XDS_PORT.name, _CMD.name),
message=(
"Run outside of a test suite, must provide"
" the exact port value (must be greater than 0)."
),
)
def _check_server_xds_port_flag(flags_dict):
if flags_dict[_CMD.name] not in ("create", "cycle"):
return True
return flags_dict[xds_flags.SERVER_XDS_PORT.name] > 0
# Type aliases
_KubernetesServerRunner = k8s_xds_server_runner.KubernetesServerRunner
def main(
argv,
): # pylint: disable=too-many-locals,too-many-branches,too-many-statements
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
command = _CMD.value
security_mode = _SECURITY.value
project: str = xds_flags.PROJECT.value
network: str = xds_flags.NETWORK.value
# Resource names.
resource_prefix: str = xds_flags.RESOURCE_PREFIX.value
resource_suffix: str = xds_flags.RESOURCE_SUFFIX.value
# Test server
server_name = xds_flags.SERVER_NAME.value
server_port = xds_flags.SERVER_PORT.value
server_maintenance_port = xds_flags.SERVER_MAINTENANCE_PORT.value
server_xds_host = xds_flags.SERVER_XDS_HOST.value
server_xds_port = xds_flags.SERVER_XDS_PORT.value
server_namespace = _KubernetesServerRunner.make_namespace_name(
resource_prefix, resource_suffix
)
gcp_api_manager = gcp.api.GcpApiManager()
if security_mode is None:
td = traffic_director.TrafficDirectorManager(
gcp_api_manager,
project=project,
network=network,
resource_prefix=resource_prefix,
resource_suffix=resource_suffix,
)
else:
td = traffic_director.TrafficDirectorSecureManager(
gcp_api_manager,
project=project,
network=network,
resource_prefix=resource_prefix,
resource_suffix=resource_suffix,
)
if server_maintenance_port is None:
server_maintenance_port = (
_KubernetesServerRunner.DEFAULT_SECURE_MODE_MAINTENANCE_PORT
)
try:
if command in ("create", "cycle"):
logger.info("Create mode")
if security_mode is None:
logger.info("No security")
td.setup_for_grpc(
server_xds_host,
server_xds_port,
health_check_port=server_maintenance_port,
)
elif security_mode == "mtls":
logger.info("Setting up mtls")
td.setup_for_grpc(
server_xds_host,
server_xds_port,
health_check_port=server_maintenance_port,
)
td.setup_server_security(
server_namespace=server_namespace,
server_name=server_name,
server_port=server_port,
tls=True,
mtls=True,
)
td.setup_client_security(
server_namespace=server_namespace,
server_name=server_name,
tls=True,
mtls=True,
)
elif security_mode == "tls":
logger.info("Setting up tls")
td.setup_for_grpc(
server_xds_host,
server_xds_port,
health_check_port=server_maintenance_port,
)
td.setup_server_security(
server_namespace=server_namespace,
server_name=server_name,
server_port=server_port,
tls=True,
mtls=False,
)
td.setup_client_security(
server_namespace=server_namespace,
server_name=server_name,
tls=True,
mtls=False,
)
elif security_mode == "plaintext":
logger.info("Setting up plaintext")
td.setup_for_grpc(
server_xds_host,
server_xds_port,
health_check_port=server_maintenance_port,
)
td.setup_server_security(
server_namespace=server_namespace,
server_name=server_name,
server_port=server_port,
tls=False,
mtls=False,
)
td.setup_client_security(
server_namespace=server_namespace,
server_name=server_name,
tls=False,
mtls=False,
)
elif security_mode == "mtls_error":
# Error case: server expects client mTLS cert,
# but client configured only for TLS
logger.info("Setting up mtls_error")
td.setup_for_grpc(
server_xds_host,
server_xds_port,
health_check_port=server_maintenance_port,
)
td.setup_server_security(
server_namespace=server_namespace,
server_name=server_name,
server_port=server_port,
tls=True,
mtls=True,
)
td.setup_client_security(
server_namespace=server_namespace,
server_name=server_name,
tls=True,
mtls=False,
)
elif security_mode == "server_authz_error":
# Error case: client does not authorize server
# because of mismatched SAN name.
logger.info("Setting up mtls_error")
td.setup_for_grpc(
server_xds_host,
server_xds_port,
health_check_port=server_maintenance_port,
)
# Regular TLS setup, but with client policy configured using
# intentionality incorrect server_namespace.
td.setup_server_security(
server_namespace=server_namespace,
server_name=server_name,
server_port=server_port,
tls=True,
mtls=False,
)
td.setup_client_security(
server_namespace=(
f"incorrect-namespace-{rand.rand_string()}"
),
server_name=server_name,
tls=True,
mtls=False,
)
logger.info("Works!")
except Exception: # noqa pylint: disable=broad-except
logger.exception("Got error during creation")
if command in ("cleanup", "cycle"):
logger.info("Cleaning up")
td.cleanup(force=True)
if command == "backends-add":
logger.info("Adding backends")
k8s_api_manager = k8s.KubernetesApiManager(
xds_k8s_flags.KUBE_CONTEXT.value
)
k8s_namespace = k8s.KubernetesNamespace(
k8s_api_manager, server_namespace
)
neg_name, neg_zones = k8s_namespace.parse_service_neg_status(
server_name, server_port
)
td.load_backend_service()
td.backend_service_add_neg_backends(neg_name, neg_zones)
td.wait_for_backends_healthy_status()
elif command == "backends-cleanup":
td.load_backend_service()
td.backend_service_remove_all_backends()
elif command == "unused-xds-port":
try:
unused_xds_port = td.find_unused_forwarding_rule_port()
logger.info(
"Found unused forwarding rule port: %s", unused_xds_port
)
except Exception: # noqa pylint: disable=broad-except
logger.exception("Couldn't find unused forwarding rule port")
if __name__ == "__main__":
app.run(main)

@ -1,162 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Run test xds client.
Gamma example:
./run.sh bin/run_test_client.py --server_xds_host=psm-grpc-server \
--server_xds_port=80 \
--config_mesh=gketd-psm-grpc-server
"""
import logging
import signal
from absl import app
from absl import flags
from bin.lib import common
from framework import xds_flags
from framework import xds_k8s_flags
from framework.infrastructure import gcp
from framework.infrastructure import k8s
logger = logging.getLogger(__name__)
# Flags
_CMD = flags.DEFINE_enum(
"cmd", default="run", enum_values=["run", "cleanup"], help="Command"
)
_MODE = flags.DEFINE_enum(
"mode",
default="default",
enum_values=[
"default",
"secure",
# Uncomment if gamma-specific changes added to the client.
# "gamma",
],
help="Select client mode",
)
_QPS = flags.DEFINE_integer("qps", default=25, help="Queries per second")
_PRINT_RESPONSE = flags.DEFINE_bool(
"print_response", default=False, help="Client prints responses"
)
_FOLLOW = flags.DEFINE_bool(
"follow",
default=False,
help=(
"Follow pod logs. Requires --collect_app_logs or"
" --debug_use_port_forwarding"
),
)
_CONFIG_MESH = flags.DEFINE_string(
"config_mesh",
default=None,
help="Optional. Supplied to bootstrap generator to indicate AppNet mesh.",
)
_REUSE_NAMESPACE = flags.DEFINE_bool(
"reuse_namespace", default=True, help="Use existing namespace if exists"
)
_CLEANUP_NAMESPACE = flags.DEFINE_bool(
"cleanup_namespace",
default=False,
help="Delete namespace during resource cleanup",
)
flags.adopt_module_key_flags(xds_flags)
flags.adopt_module_key_flags(xds_k8s_flags)
# Running outside of a test suite, so require explicit resource_suffix.
flags.mark_flag_as_required(xds_flags.RESOURCE_SUFFIX.name)
@flags.multi_flags_validator(
(xds_flags.SERVER_XDS_PORT.name, _CMD.name),
message=(
"Run outside of a test suite, must provide"
" the exact port value (must be greater than 0)."
),
)
def _check_server_xds_port_flag(flags_dict):
if flags_dict[_CMD.name] == "cleanup":
return True
return flags_dict[xds_flags.SERVER_XDS_PORT.name] > 0
def _make_sigint_handler(client_runner: common.KubernetesClientRunner):
def sigint_handler(sig, frame):
del sig, frame
print("Caught Ctrl+C. Shutting down the logs")
client_runner.stop_pod_dependencies(log_drain_sec=3)
return sigint_handler
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
# Log following and port forwarding.
should_follow_logs = _FOLLOW.value and xds_flags.COLLECT_APP_LOGS.value
should_port_forward = (
should_follow_logs and xds_k8s_flags.DEBUG_USE_PORT_FORWARDING.value
)
enable_workload_identity: bool = (
xds_k8s_flags.ENABLE_WORKLOAD_IDENTITY.value
)
# Setup.
gcp_api_manager = gcp.api.GcpApiManager()
k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
client_namespace = common.make_client_namespace(k8s_api_manager)
client_runner = common.make_client_runner(
client_namespace,
gcp_api_manager,
reuse_namespace=_REUSE_NAMESPACE.value,
mode=_MODE.value,
port_forwarding=should_port_forward,
enable_workload_identity=enable_workload_identity,
)
# Server target
server_target = f"xds:///{xds_flags.SERVER_XDS_HOST.value}"
if xds_flags.SERVER_XDS_PORT.value != 80:
server_target = f"{server_target}:{xds_flags.SERVER_XDS_PORT.value}"
if _CMD.value == "run":
logger.info("Run client, mode=%s", _MODE.value)
client_runner.run(
server_target=server_target,
qps=_QPS.value,
print_response=_PRINT_RESPONSE.value,
secure_mode=_MODE.value == "secure",
config_mesh=_CONFIG_MESH.value,
log_to_stdout=_FOLLOW.value,
)
if should_follow_logs:
print("Following pod logs. Press Ctrl+C top stop")
signal.signal(signal.SIGINT, _make_sigint_handler(client_runner))
signal.pause()
elif _CMD.value == "cleanup":
logger.info("Cleanup client")
client_runner.cleanup(
force=True, force_namespace=_CLEANUP_NAMESPACE.value
)
if __name__ == "__main__":
app.run(main)

@ -1,123 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Run test xds server.
Gamma example:
./run.sh bin/run_test_server.py --mode=gamma
"""
import logging
import signal
from absl import app
from absl import flags
from bin.lib import common
from framework import xds_flags
from framework import xds_k8s_flags
from framework.infrastructure import gcp
from framework.infrastructure import k8s
logger = logging.getLogger(__name__)
# Flags
_CMD = flags.DEFINE_enum(
"cmd", default="run", enum_values=["run", "cleanup"], help="Command"
)
_MODE = flags.DEFINE_enum(
"mode",
default="default",
enum_values=["default", "secure", "gamma"],
help="Select server mode",
)
_REUSE_NAMESPACE = flags.DEFINE_bool(
"reuse_namespace", default=True, help="Use existing namespace if exists"
)
_REUSE_SERVICE = flags.DEFINE_bool(
"reuse_service", default=False, help="Use existing service if exists"
)
_FOLLOW = flags.DEFINE_bool(
"follow", default=False, help="Follow pod logs. Requires --collect_app_logs"
)
_CLEANUP_NAMESPACE = flags.DEFINE_bool(
"cleanup_namespace",
default=False,
help="Delete namespace during resource cleanup",
)
flags.adopt_module_key_flags(xds_flags)
flags.adopt_module_key_flags(xds_k8s_flags)
# Running outside of a test suite, so require explicit resource_suffix.
flags.mark_flag_as_required("resource_suffix")
def _make_sigint_handler(server_runner: common.KubernetesServerRunner):
def sigint_handler(sig, frame):
del sig, frame
print("Caught Ctrl+C. Shutting down the logs")
server_runner.stop_pod_dependencies(log_drain_sec=3)
return sigint_handler
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
should_follow_logs = _FOLLOW.value and xds_flags.COLLECT_APP_LOGS.value
should_port_forward = (
should_follow_logs and xds_k8s_flags.DEBUG_USE_PORT_FORWARDING.value
)
enable_workload_identity: bool = (
xds_k8s_flags.ENABLE_WORKLOAD_IDENTITY.value
)
# Setup.
gcp_api_manager = gcp.api.GcpApiManager()
k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
server_namespace = common.make_server_namespace(k8s_api_manager)
server_runner = common.make_server_runner(
server_namespace,
gcp_api_manager,
reuse_namespace=_REUSE_NAMESPACE.value,
reuse_service=_REUSE_SERVICE.value,
mode=_MODE.value,
port_forwarding=should_port_forward,
enable_workload_identity=enable_workload_identity,
)
if _CMD.value == "run":
logger.info("Run server, mode=%s", _MODE.value)
server_runner.run(
test_port=xds_flags.SERVER_PORT.value,
maintenance_port=xds_flags.SERVER_MAINTENANCE_PORT.value,
secure_mode=_MODE.value == "secure",
log_to_stdout=_FOLLOW.value,
)
if should_follow_logs:
print("Following pod logs. Press Ctrl+C top stop")
signal.signal(signal.SIGINT, _make_sigint_handler(server_runner))
signal.pause()
elif _CMD.value == "cleanup":
logger.info("Cleanup server")
server_runner.cleanup(
force=True, force_namespace=_CLEANUP_NAMESPACE.value
)
if __name__ == "__main__":
app.run(main)

@ -1,3 +0,0 @@
# Common config file for PSM CSM tests.
--resource_prefix=psm-csm
--noenable_workload_identity

@ -1,11 +0,0 @@
--resource_prefix=psm-interop
--td_bootstrap_image=gcr.io/grpc-testing/td-grpc-bootstrap:7d8d90477792e2e1bfe3a3da20b3dc9ef01d326c
# The canonical implementation of the xDS test server.
# Can be used in tests where language-specific xDS test server does not exist,
# or missing a feature required for the test.
# TODO(sergiitk): Update every ~ 6 months; next 2024-01.
--server_image_canonical=gcr.io/grpc-testing/xds-interop/java-server:canonical-v1.56
--logger_levels=__main__:DEBUG,framework:INFO
--verbosity=0

@ -1,4 +0,0 @@
# Common config file for GAMMA PSM tests.
# TODO(sergiitk): delete when confirmed it's not used
--resource_prefix=psm-gamma
--noenable_workload_identity

@ -1,9 +0,0 @@
--flagfile=config/common.cfg
--project=grpc-testing
--network=default-vpc
--gcp_service_account=xds-k8s-interop-tests@grpc-testing.iam.gserviceaccount.com
--private_api_key_secret_name=projects/830293263384/secrets/xds-interop-tests-private-api-access-key
# Randomize xds port.
--server_xds_port=0
# ResultStore UI doesn't support 256 colors.
--color_style=ansi16

@ -1,62 +0,0 @@
# Copy to local-dev.cfg; replace ${UPPERCASED_VARS}. Details in README.md.
## Import common settings
--flagfile=config/common.cfg
### --------------------------------- Project ----------------------------------
## Project settings
--project=${PROJECT_ID}
--gcp_service_account=${WORKLOAD_SA_EMAIL}
--private_api_key_secret_name=projects/${PROJECT_NUMBER}/secrets/xds-interop-tests-private-api-access-key
### --------------------------------- Clusters ---------------------------------
## The name of kube context to use (points to your GKE cluster).
--kube_context=${KUBE_CONTEXT}
### ------------------------------- App images ---------------------------------
## Test images, f.e. java v1.57.x.
--server_image=gcr.io/grpc-testing/xds-interop/java-server:v1.57.x
--client_image=gcr.io/grpc-testing/xds-interop/java-client:v1.57.x
### ----------------------------------- App ------------------------------------
## Use a resource prefix to describe usage and ownership.
--resource_prefix=${USER}-psm
## Use random port in the server xds address, f.e. xds://my-test-server:42
--server_xds_port=0
## When running ./bin helpers, you might need to set randomly generated fields
## to a static value.
# --resource_suffix=dev
# --server_xds_port=1111
### --------------------------------- Logging ----------------------------------
## Verbosity: -3 (fatal/critical), -2 (error), -1 (warning), 0 (info), 1 (debug)
# --verbosity=1
## Uncomment and set different log levels per module. Examples:
# --logger_levels=__main__:DEBUG,framework:INFO
# --logger_levels=__main__:INFO,framework:DEBUG,urllib3.connectionpool:ERROR
## Uncomment to collect test client, server logs to out/test_app_logs/ folder.
# --collect_app_logs
# --log_dir=out
### ------------------------------- Local dev ---------------------------------
## Enable port forwarding in local dev.
--debug_use_port_forwarding
## (convenience) Allow to set always known flags.
--undefok=private_api_key_secret_name,gcp_ui_url
## Uncomment to create the firewall rule before test case runs.
# --ensure_firewall
## Uncomment if the health check port opened in firewall is different than 8080.
# --server_port=50051

@ -1,15 +0,0 @@
--resource_prefix=interop-psm-url-map
--strategy=reuse
--server_xds_port=8848
# NOTE(lidiz) we pin the server image to java-server because:
# 1. Only Java server understands the rpc-behavior metadata.
# 2. All UrlMap tests today are testing client-side logic.
#
# TODO(sergiitk): Use --server_image_canonical instead.
--server_image=gcr.io/grpc-testing/xds-interop/java-server:canonical-v1.56
# Disables the GCP Workload Identity feature to simplify permission control
--gcp_service_account=None
--private_api_key_secret_name=None
--noenable_workload_identity

@ -1,13 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,182 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Optional
from framework import xds_k8s_testcase
from framework.helpers import rand as helpers_rand
from framework.infrastructure import k8s
from framework.infrastructure import traffic_director
from framework.test_app.runners.k8s import k8s_xds_client_runner
from framework.test_app.runners.k8s import k8s_xds_server_runner
logger = logging.getLogger(__name__)
# Type aliases
TrafficDirectorManager = traffic_director.TrafficDirectorManager
XdsTestServer = xds_k8s_testcase.XdsTestServer
XdsTestClient = xds_k8s_testcase.XdsTestClient
KubernetesServerRunner = k8s_xds_server_runner.KubernetesServerRunner
KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner
class BootstrapGeneratorBaseTest(xds_k8s_testcase.XdsKubernetesBaseTestCase):
"""Common functionality to support testing of bootstrap generator versions
across gRPC clients and servers."""
@classmethod
def setUpClass(cls):
"""Hook method for setting up class fixture before running tests in
the class.
"""
super().setUpClass()
if cls.server_maintenance_port is None:
cls.server_maintenance_port = (
KubernetesServerRunner.DEFAULT_MAINTENANCE_PORT
)
# Bootstrap generator tests are run as parameterized tests which only
# perform steps specific to the parameterized version of the bootstrap
# generator under test.
#
# Here, we perform setup steps which are common across client and server
# side variants of the bootstrap generator test.
if cls.resource_suffix_randomize:
cls.resource_suffix = helpers_rand.random_resource_suffix()
logger.info(
"Test run resource prefix: %s, suffix: %s",
cls.resource_prefix,
cls.resource_suffix,
)
# TD Manager
cls.td = cls.initTrafficDirectorManager()
# Test namespaces for client and server.
cls.server_namespace = KubernetesServerRunner.make_namespace_name(
cls.resource_prefix, cls.resource_suffix
)
cls.client_namespace = KubernetesClientRunner.make_namespace_name(
cls.resource_prefix, cls.resource_suffix
)
# Ensures the firewall exist
if cls.ensure_firewall:
cls.td.create_firewall_rule(
allowed_ports=cls.firewall_allowed_ports
)
# Randomize xds port, when it's set to 0
if cls.server_xds_port == 0:
# TODO(sergiitk): this is prone to race conditions:
# The port might not me taken now, but there's not guarantee
# it won't be taken until the tests get to creating
# forwarding rule. This check is better than nothing,
# but we should find a better approach.
cls.server_xds_port = cls.td.find_unused_forwarding_rule_port()
logger.info("Found unused xds port: %s", cls.server_xds_port)
# Common TD resources across client and server tests.
cls.td.setup_for_grpc(
cls.server_xds_host,
cls.server_xds_port,
health_check_port=cls.server_maintenance_port,
)
@classmethod
def tearDownClass(cls):
cls.td.cleanup(force=cls.force_cleanup)
super().tearDownClass()
@classmethod
def initTrafficDirectorManager(cls) -> TrafficDirectorManager:
return TrafficDirectorManager(
cls.gcp_api_manager,
project=cls.project,
resource_prefix=cls.resource_prefix,
resource_suffix=cls.resource_suffix,
network=cls.network,
compute_api_version=cls.compute_api_version,
)
@classmethod
def initKubernetesServerRunner(
cls, *, td_bootstrap_image: Optional[str] = None
) -> KubernetesServerRunner:
if not td_bootstrap_image:
td_bootstrap_image = cls.td_bootstrap_image
return KubernetesServerRunner(
k8s.KubernetesNamespace(cls.k8s_api_manager, cls.server_namespace),
deployment_name=cls.server_name,
image_name=cls.server_image,
td_bootstrap_image=td_bootstrap_image,
gcp_project=cls.project,
gcp_api_manager=cls.gcp_api_manager,
gcp_service_account=cls.gcp_service_account,
xds_server_uri=cls.xds_server_uri,
network=cls.network,
debug_use_port_forwarding=cls.debug_use_port_forwarding,
enable_workload_identity=cls.enable_workload_identity,
)
@staticmethod
def startTestServer(
server_runner,
port,
maintenance_port,
xds_host,
xds_port,
replica_count=1,
**kwargs,
) -> XdsTestServer:
test_server = server_runner.run(
replica_count=replica_count,
test_port=port,
maintenance_port=maintenance_port,
**kwargs,
)[0]
test_server.set_xds_address(xds_host, xds_port)
return test_server
def initKubernetesClientRunner(
self, td_bootstrap_image: Optional[str] = None
) -> KubernetesClientRunner:
if not td_bootstrap_image:
td_bootstrap_image = self.td_bootstrap_image
return KubernetesClientRunner(
k8s.KubernetesNamespace(
self.k8s_api_manager, self.client_namespace
),
deployment_name=self.client_name,
image_name=self.client_image,
td_bootstrap_image=td_bootstrap_image,
gcp_project=self.project,
gcp_api_manager=self.gcp_api_manager,
gcp_service_account=self.gcp_service_account,
xds_server_uri=self.xds_server_uri,
network=self.network,
debug_use_port_forwarding=self.debug_use_port_forwarding,
enable_workload_identity=self.enable_workload_identity,
stats_port=self.client_port,
reuse_namespace=self.server_namespace == self.client_namespace,
)
def startTestClient(
self, test_server: XdsTestServer, **kwargs
) -> XdsTestClient:
test_client = self.client_runner.run(
server_target=test_server.xds_uri, **kwargs
)
test_client.wait_for_server_channel_ready()
return test_client

@ -1,58 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
# TODO(sergiitk): All custom error classes should extend this.
class FrameworkError(Exception):
"""Base error class for framework errors."""
message: str
kwargs: dict[str, Any]
note: str = ""
def __init__(self, message: str, *args, **kwargs):
self.message = message
# Exception only stores args.
self.kwargs = kwargs
# Pass to the Exception as if message is in **args.
super().__init__(*[message, *args])
# TODO(sergiitk): Remove in py3.11, this will be built-in. See PEP 678.
def add_note(self, note: str):
self.note = note
def __str__(self):
return self.message if not self.note else f"{self.message}\n{self.note}"
@classmethod
def note_blanket_error(cls, reason: str) -> str:
return f"""
Reason: {reason}
{'#' * 80}
# IMPORTANT: This is not a root cause. This is an indication that
# _something_ -- literally _anything_ -- has gone wrong in the xDS flow.
# It is _your_ responsibility to look through the interop client and/or
# server logs to determine what exactly went wrong.
{'#' * 80}
"""
@classmethod
def note_blanket_error_info_below(
cls, reason: str, *, info_below: str
) -> str:
return (
f"{cls.note_blanket_error(reason)}"
f"# Please inspect the information below:\n{info_below}"
)

@ -1,13 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,79 +0,0 @@
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This contains common helpers for working with dates and time."""
import datetime
import re
from typing import Optional, Pattern
import dateutil.parser
RE_ZERO_OFFSET: Pattern[str] = re.compile(r"[+\-]00:?00$")
def utc_now() -> datetime.datetime:
"""Construct a datetime from current time in UTC timezone."""
return datetime.datetime.now(datetime.timezone.utc)
def shorten_utc_zone(utc_datetime_str: str) -> str:
"""Replace ±00:00 timezone designator with Z (zero offset AKA Zulu time)."""
return RE_ZERO_OFFSET.sub("Z", utc_datetime_str)
def iso8601_utc_time(time: datetime.datetime = None) -> str:
"""Converts datetime UTC and formats as ISO-8601 Zulu time."""
utc_time = time.astimezone(tz=datetime.timezone.utc)
return shorten_utc_zone(utc_time.isoformat())
def iso8601_to_datetime(date_str: str) -> datetime.datetime:
# TODO(sergiitk): use regular datetime.datetime when upgraded to py3.11.
return dateutil.parser.isoparse(date_str)
def datetime_suffix(*, seconds: bool = False) -> str:
"""Return current UTC date, and time in a format useful for resource naming.
Examples:
- 20210626-1859 (seconds=False)
- 20210626-185942 (seconds=True)
Use in resources names incompatible with ISO 8601, e.g. some GCP resources
that only allow lowercase alphanumeric chars and dashes.
Hours and minutes are joined together for better readability, so time is
visually distinct from dash-separated date.
"""
return utc_now().strftime("%Y%m%d-%H%M" + ("%S" if seconds else ""))
def ago(date_from: datetime.datetime, now: Optional[datetime.datetime] = None):
if not now:
now = utc_now()
# Round down microseconds.
date_from = date_from.replace(microsecond=0)
now = now.replace(microsecond=0)
# Calculate the diff.
delta: datetime.timedelta = now - date_from
if delta.days > 1:
result = f"{delta.days} days"
elif delta.days > 0:
result = f"{delta.days} day"
else:
# This case covers negative deltas too.
result = f"{delta} (h:mm:ss)"
return f"{result} ago"

@ -1,204 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This contains common helpers for working with grpc data structures."""
import dataclasses
import functools
from typing import Dict, List, Optional
import grpc
import yaml
from framework.rpc import grpc_testing
# Type aliases
RpcsByPeer: Dict[str, int]
RpcMetadata = grpc_testing.LoadBalancerStatsResponse.RpcMetadata
MetadataByPeer: list[str, RpcMetadata]
@functools.cache # pylint: disable=no-member
def status_from_int(grpc_status_int: int) -> Optional[grpc.StatusCode]:
"""Converts the integer gRPC status code to the grpc.StatusCode enum."""
for grpc_status in grpc.StatusCode:
if grpc_status.value[0] == grpc_status_int:
return grpc_status
return None
def status_eq(grpc_status_int: int, grpc_status: grpc.StatusCode) -> bool:
"""Compares the integer gRPC status code with the grpc.StatusCode enum."""
return status_from_int(grpc_status_int) is grpc_status
def status_pretty(grpc_status: grpc.StatusCode) -> str:
"""Formats the status code as (int, NAME), f.e. (4, DEADLINE_EXCEEDED)"""
return f"({grpc_status.value[0]}, {grpc_status.name})"
@dataclasses.dataclass(frozen=True)
class PrettyStatsPerMethod:
# The name of the method.
method: str
# The number of RPCs started for this method, completed and in-flight.
rpcs_started: int
# The number of RPCs that completed with each status for this method.
# Format: status code -> RPC count, f.e.:
# {
# "(0, OK)": 20,
# "(14, UNAVAILABLE)": 10
# }
result: Dict[str, int]
@functools.cached_property # pylint: disable=no-member
def rpcs_completed(self):
"""Returns the total count of competed RPCs across all statuses."""
return sum(self.result.values())
@staticmethod
def from_response(
method_name: str, method_stats: grpc_testing.MethodStats
) -> "PrettyStatsPerMethod":
stats: Dict[str, int] = dict()
for status_int, count in method_stats.result.items():
status: Optional[grpc.StatusCode] = status_from_int(status_int)
status_formatted = status_pretty(status) if status else "None"
stats[status_formatted] = count
return PrettyStatsPerMethod(
method=method_name,
rpcs_started=method_stats.rpcs_started,
result=stats,
)
def accumulated_stats_pretty(
accumulated_stats: grpc_testing.LoadBalancerAccumulatedStatsResponse,
*,
ignore_empty: bool = False,
) -> str:
"""Pretty print LoadBalancerAccumulatedStatsResponse.
Example:
- method: EMPTY_CALL
rpcs_started: 0
result:
(2, UNKNOWN): 20
- method: UNARY_CALL
rpcs_started: 31
result:
(0, OK): 10
(14, UNAVAILABLE): 20
"""
# Only look at stats_per_method, as the other fields are deprecated.
result: List[Dict] = []
for method_name, method_stats in accumulated_stats.stats_per_method.items():
pretty_stats = PrettyStatsPerMethod.from_response(
method_name, method_stats
)
# Skip methods with no RPCs reported when ignore_empty is True.
if ignore_empty and not pretty_stats.rpcs_started:
continue
result.append(dataclasses.asdict(pretty_stats))
return yaml.dump(result, sort_keys=False)
@dataclasses.dataclass(frozen=True)
class PrettyLoadBalancerStats:
# The number of RPCs that failed to record a remote peer.
num_failures: int
# The number of completed RPCs for each peer.
# Format: a dictionary from the host name (str) to the RPC count (int), f.e.
# {"host-a": 10, "host-b": 20}
rpcs_by_peer: "RpcsByPeer"
# The number of completed RPCs per method per each pear.
# Format: a dictionary from the method name to RpcsByPeer (see above), f.e.:
# {
# "UNARY_CALL": {"host-a": 10, "host-b": 20},
# "EMPTY_CALL": {"host-a": 42},
# }
rpcs_by_method: Dict[str, "RpcsByPeer"]
metadatas_by_peer: Dict[str, "MetadataByPeer"]
@staticmethod
def _parse_rpcs_by_peer(
rpcs_by_peer: grpc_testing.RpcsByPeer,
) -> "RpcsByPeer":
result = dict()
for peer, count in rpcs_by_peer.items():
result[peer] = count
return result
@staticmethod
def _parse_metadatas_by_peer(
metadatas_by_peer: grpc_testing.LoadBalancerStatsResponse.MetadataByPeer,
) -> "MetadataByPeer":
result = dict()
for peer, metadatas in metadatas_by_peer.items():
pretty_metadata = ""
for rpc_metadatas in metadatas.rpc_metadata:
for metadata in rpc_metadatas.metadata:
pretty_metadata += (
metadata.key + ": " + metadata.value + ", "
)
result[peer] = pretty_metadata
return result
@classmethod
def from_response(
cls, lb_stats: grpc_testing.LoadBalancerStatsResponse
) -> "PrettyLoadBalancerStats":
rpcs_by_method: Dict[str, "RpcsByPeer"] = dict()
for method_name, stats in lb_stats.rpcs_by_method.items():
if stats:
rpcs_by_method[method_name] = cls._parse_rpcs_by_peer(
stats.rpcs_by_peer
)
return PrettyLoadBalancerStats(
num_failures=lb_stats.num_failures,
rpcs_by_peer=cls._parse_rpcs_by_peer(lb_stats.rpcs_by_peer),
rpcs_by_method=rpcs_by_method,
metadatas_by_peer=cls._parse_metadatas_by_peer(
lb_stats.metadatas_by_peer
),
)
def lb_stats_pretty(lb: grpc_testing.LoadBalancerStatsResponse) -> str:
"""Pretty print LoadBalancerStatsResponse.
Example:
num_failures: 13
rpcs_by_method:
UNARY_CALL:
psm-grpc-server-a: 100
psm-grpc-server-b: 42
EMPTY_CALL:
psm-grpc-server-a: 200
rpcs_by_peer:
psm-grpc-server-a: 200
psm-grpc-server-b: 42
"""
pretty_lb_stats = PrettyLoadBalancerStats.from_response(lb)
stats_as_dict = dataclasses.asdict(pretty_lb_stats)
# Don't print metadatas_by_peer unless it has data
if not stats_as_dict["metadatas_by_peer"]:
stats_as_dict.pop("metadatas_by_peer")
return yaml.dump(stats_as_dict, sort_keys=False)

@ -1,106 +0,0 @@
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The module contains helpers to enable color output in terminals.
Use this to log resources dumped as a structured document (f.e. YAML),
and enable colorful syntax highlighting.
TODO(sergiitk): This can be used to output protobuf responses formatted as JSON.
"""
import logging
from typing import Optional
from absl import flags
import pygments
import pygments.formatter
import pygments.formatters.other
import pygments.formatters.terminal
import pygments.formatters.terminal256
import pygments.lexer
import pygments.lexers.data
import pygments.styles
# The style for terminals supporting 8/16 colors.
STYLE_ANSI_16 = "ansi16"
# Join with pygments styles for terminals supporting 88/256 colors.
ALL_COLOR_STYLES = [STYLE_ANSI_16] + list(pygments.styles.get_all_styles())
# Flags.
COLOR = flags.DEFINE_bool("color", default=True, help="Colorize the output")
COLOR_STYLE = flags.DEFINE_enum(
"color_style",
default="material",
enum_values=ALL_COLOR_STYLES,
help=(
"Color styles for terminals supporting 256 colors. "
f"Use {STYLE_ANSI_16} style for terminals supporting 8/16 colors"
),
)
logger = logging.getLogger(__name__)
# Type aliases.
Lexer = pygments.lexer.Lexer
YamlLexer = pygments.lexers.data.YamlLexer
Formatter = pygments.formatter.Formatter
NullFormatter = pygments.formatters.other.NullFormatter
TerminalFormatter = pygments.formatters.terminal.TerminalFormatter
Terminal256Formatter = pygments.formatters.terminal256.Terminal256Formatter
class Highlighter:
formatter: Formatter
lexer: Lexer
color: bool
color_style: Optional[str] = None
def __init__(
self,
*,
lexer: Lexer,
color: Optional[bool] = None,
color_style: Optional[str] = None,
):
self.lexer = lexer
self.color = color if color is not None else COLOR.value
if self.color:
color_style = color_style if color_style else COLOR_STYLE.value
if color_style not in ALL_COLOR_STYLES:
raise ValueError(
f"Unrecognized color style {color_style}, "
f"valid styles: {ALL_COLOR_STYLES}"
)
if color_style == STYLE_ANSI_16:
# 8/16 colors support only.
self.formatter = TerminalFormatter()
else:
# 88/256 colors.
self.formatter = Terminal256Formatter(style=color_style)
else:
self.formatter = NullFormatter()
def highlight(self, code: str) -> str:
return pygments.highlight(code, self.lexer, self.formatter)
class HighlighterYaml(Highlighter):
def __init__(
self, *, color: Optional[bool] = None, color_style: Optional[str] = None
):
super().__init__(
lexer=YamlLexer(encoding="utf-8"),
color=color,
color_style=color_style,
)

@ -1,48 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The module contains helpers to initialize and configure logging."""
import functools
import pathlib
from absl import flags
from absl import logging
def _ensure_flags_parsed() -> None:
if not flags.FLAGS.is_parsed():
raise flags.UnparsedFlagAccessError("Must initialize absl flags first.")
@functools.lru_cache(None)
def log_get_root_dir() -> pathlib.Path:
_ensure_flags_parsed()
log_root = pathlib.Path(logging.find_log_dir()).absolute()
logging.info("Log root dir: %s", log_root)
return log_root
def log_dir_mkdir(name: str) -> pathlib.Path:
"""Creates and returns a subdir with the given name in the log folder."""
if len(pathlib.Path(name).parts) != 1:
raise ValueError(f"Dir name must be a single component; got: {name}")
if ".." in name:
raise ValueError(f"Dir name must not be above the log root.")
log_subdir = log_get_root_dir() / name
if log_subdir.exists() and log_subdir.is_dir():
logging.debug("Using existing log subdir: %s", log_subdir)
else:
log_subdir.mkdir()
logging.debug("Created log subdir: %s", log_subdir)
return log_subdir

@ -1,49 +0,0 @@
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This contains common helpers for generating randomized data."""
import random
import string
import framework.helpers.datetime
# Alphanumeric characters, similar to regex [:alnum:] class, [a-zA-Z0-9]
ALPHANUM = string.ascii_letters + string.digits
# Lowercase alphanumeric characters: [a-z0-9]
# Use ALPHANUM_LOWERCASE alphabet when case-sensitivity is a concern.
ALPHANUM_LOWERCASE = string.ascii_lowercase + string.digits
def rand_string(length: int = 8, *, lowercase: bool = False) -> str:
"""Return random alphanumeric string of given length.
Space for default arguments: alphabet^length
lowercase and uppercase = (26*2 + 10)^8 = 2.18e14 = 218 trillion.
lowercase only = (26 + 10)^8 = 2.8e12 = 2.8 trillion.
"""
alphabet = ALPHANUM_LOWERCASE if lowercase else ALPHANUM
return "".join(random.choices(population=alphabet, k=length))
def random_resource_suffix() -> str:
"""Return a ready-to-use resource suffix with datetime and nonce."""
# Date and time suffix for debugging. Seconds skipped, not as relevant
# Format example: 20210626-1859
datetime_suffix: str = framework.helpers.datetime.datetime_suffix()
# Use lowercase chars because some resource names won't allow uppercase.
# For len 5, total (26 + 10)^5 = 60,466,176 combinations.
# Approx. number of test runs needed to start at the same minute to
# produce a collision: math.sqrt(math.pi/2 * (26+10)**5) ≈ 9745.
# https://en.wikipedia.org/wiki/Birthday_attack#Mathematics
unique_hash: str = rand_string(5, lowercase=True)
return f"{datetime_suffix}-{unique_hash}"

@ -1,273 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This contains common retrying helpers (retryers).
We use tenacity as a general-purpose retrying library.
> It [tenacity] originates from a fork of retrying which is sadly no
> longer maintained. Tenacity isnt api compatible with retrying but >
> adds significant new functionality and fixes a number of longstanding bugs.
> - https://tenacity.readthedocs.io/en/latest/index.html
"""
import datetime
import logging
from typing import Any, Callable, List, Optional, Tuple, Type
import tenacity
from tenacity import _utils as tenacity_utils
from tenacity import compat as tenacity_compat
from tenacity import stop
from tenacity import wait
from tenacity.retry import retry_base
retryers_logger = logging.getLogger(__name__)
# Type aliases
timedelta = datetime.timedelta
Retrying = tenacity.Retrying
CheckResultFn = Callable[[Any], bool]
_ExceptionClasses = Tuple[Type[Exception], ...]
def _build_retry_conditions(
*,
retry_on_exceptions: Optional[_ExceptionClasses] = None,
check_result: Optional[CheckResultFn] = None,
) -> List[retry_base]:
# Retry on all exceptions by default
if retry_on_exceptions is None:
retry_on_exceptions = (Exception,)
retry_conditions = [tenacity.retry_if_exception_type(retry_on_exceptions)]
if check_result is not None:
if retry_on_exceptions:
# When retry_on_exceptions is set, also catch them while executing
# check_result callback.
check_result = _safe_check_result(check_result, retry_on_exceptions)
retry_conditions.append(tenacity.retry_if_not_result(check_result))
return retry_conditions
def exponential_retryer_with_timeout(
*,
wait_min: timedelta,
wait_max: timedelta,
timeout: timedelta,
retry_on_exceptions: Optional[_ExceptionClasses] = None,
check_result: Optional[CheckResultFn] = None,
logger: Optional[logging.Logger] = None,
log_level: Optional[int] = logging.DEBUG,
) -> Retrying:
if logger is None:
logger = retryers_logger
if log_level is None:
log_level = logging.DEBUG
retry_conditions = _build_retry_conditions(
retry_on_exceptions=retry_on_exceptions, check_result=check_result
)
retry_error_callback = _on_error_callback(
timeout=timeout, check_result=check_result
)
return Retrying(
retry=tenacity.retry_any(*retry_conditions),
wait=wait.wait_exponential(
min=wait_min.total_seconds(), max=wait_max.total_seconds()
),
stop=stop.stop_after_delay(timeout.total_seconds()),
before_sleep=_before_sleep_log(logger, log_level),
retry_error_callback=retry_error_callback,
)
def constant_retryer(
*,
wait_fixed: timedelta,
attempts: int = 0,
timeout: Optional[timedelta] = None,
retry_on_exceptions: Optional[_ExceptionClasses] = None,
check_result: Optional[CheckResultFn] = None,
logger: Optional[logging.Logger] = None,
log_level: Optional[int] = logging.DEBUG,
) -> Retrying:
if logger is None:
logger = retryers_logger
if log_level is None:
log_level = logging.DEBUG
if attempts < 1 and timeout is None:
raise ValueError("The number of attempts or the timeout must be set")
stops = []
if attempts > 0:
stops.append(stop.stop_after_attempt(attempts))
if timeout is not None:
stops.append(stop.stop_after_delay(timeout.total_seconds()))
retry_conditions = _build_retry_conditions(
retry_on_exceptions=retry_on_exceptions, check_result=check_result
)
retry_error_callback = _on_error_callback(
timeout=timeout, attempts=attempts, check_result=check_result
)
return Retrying(
retry=tenacity.retry_any(*retry_conditions),
wait=wait.wait_fixed(wait_fixed.total_seconds()),
stop=stop.stop_any(*stops),
before_sleep=_before_sleep_log(logger, log_level),
retry_error_callback=retry_error_callback,
)
def _on_error_callback(
*,
timeout: Optional[timedelta] = None,
attempts: int = 0,
check_result: Optional[CheckResultFn] = None,
):
"""A helper to propagate the initial state to the RetryError, so that
it can assemble a helpful message containing timeout/number of attempts.
"""
def error_handler(retry_state: tenacity.RetryCallState):
raise RetryError(
retry_state,
timeout=timeout,
attempts=attempts,
check_result=check_result,
)
return error_handler
def _safe_check_result(
check_result: CheckResultFn, retry_on_exceptions: _ExceptionClasses
) -> CheckResultFn:
"""Wraps check_result callback to catch and handle retry_on_exceptions.
Normally tenacity doesn't retry when retry_if_result/retry_if_not_result
raise an error. This wraps the callback to automatically catch Exceptions
specified in the retry_on_exceptions argument.
Ideally we should make all check_result callbacks to not throw, but
in case it does, we'd rather be annoying in the logs, than break the test.
"""
def _check_result_wrapped(result):
try:
return check_result(result)
except retry_on_exceptions:
retryers_logger.warning(
(
"Result check callback %s raised an exception."
"This shouldn't happen, please handle any exceptions and "
"return return a boolean."
),
tenacity_utils.get_callback_name(check_result),
exc_info=True,
)
return False
return _check_result_wrapped
def _before_sleep_log(logger, log_level, exc_info=False):
"""Same as tenacity.before_sleep_log, but only logs primitive return values.
This is not useful when the return value is a dump of a large object.
"""
def log_it(retry_state):
if retry_state.outcome.failed:
ex = retry_state.outcome.exception()
verb, value = "raised", "%s: %s" % (type(ex).__name__, ex)
if exc_info:
local_exc_info = tenacity_compat.get_exc_info_from_future(
retry_state.outcome
)
else:
local_exc_info = False
else:
local_exc_info = False # exc_info does not apply when no exception
result = retry_state.outcome.result()
if isinstance(result, (int, bool, str)):
verb, value = "returned", result
else:
verb, value = "returned type", type(result)
logger.log(
log_level,
"Retrying %s in %s seconds as it %s %s.",
tenacity_utils.get_callback_name(retry_state.fn),
getattr(retry_state.next_action, "sleep"),
verb,
value,
exc_info=local_exc_info,
)
return log_it
class RetryError(tenacity.RetryError):
# Note: framework.errors.FrameworkError could be used as a mixin,
# but this would rely too much on tenacity.RetryError to not change.
last_attempt: tenacity.Future
note: str = ""
def __init__(
self,
retry_state,
*,
timeout: Optional[timedelta] = None,
attempts: int = 0,
check_result: Optional[CheckResultFn] = None,
):
last_attempt: tenacity.Future = retry_state.outcome
super().__init__(last_attempt)
callback_name = tenacity_utils.get_callback_name(retry_state.fn)
self.message = f"Retry error calling {callback_name}:"
if timeout:
self.message += f" timeout {timeout} (h:mm:ss) exceeded"
if attempts:
self.message += " or"
if attempts:
self.message += f" {attempts} attempts exhausted"
self.message += "."
if last_attempt.failed:
err = last_attempt.exception()
self.message += f" Last exception: {type(err).__name__}: {err}"
elif check_result:
self.message += " Check result callback returned False."
def result(self, *, default=None):
return (
self.last_attempt.result()
if not self.last_attempt.failed
else default
)
def exception(self, *, default=None):
return (
self.last_attempt.exception()
if self.last_attempt.failed
else default
)
# TODO(sergiitk): Remove in py3.11, this will be built-in. See PEP 678.
def add_note(self, note: str):
self.note = note
def __str__(self):
return self.message if not self.note else f"{self.message}\n{self.note}"

@ -1,103 +0,0 @@
# Copyright 2022 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The classes and predicates to assist validate test config for test cases."""
from dataclasses import dataclass
import enum
import logging
import re
from typing import Optional
from packaging import version as pkg_version
logger = logging.getLogger(__name__)
class Lang(enum.Flag):
UNKNOWN = enum.auto()
CPP = enum.auto()
GO = enum.auto()
JAVA = enum.auto()
PYTHON = enum.auto()
NODE = enum.auto()
def __str__(self):
return str(self.name).lower()
@classmethod
def from_string(cls, lang: str):
try:
return cls[lang.upper()]
except KeyError:
return cls.UNKNOWN
@dataclass
class TestConfig:
"""Describes the config for the test suite.
TODO(sergiitk): rename to LangSpec and rename skips.py to lang.py.
"""
client_lang: Lang
server_lang: Lang
version: Optional[str]
def version_gte(self, another: str) -> bool:
"""Returns a bool for whether this VERSION is >= then ANOTHER version.
Special cases:
1) Versions "master" or "dev" are always greater than ANOTHER:
- master > v1.999.x > v1.55.x
- dev > v1.999.x > v1.55.x
- dev == master
2) Versions "dev-VERSION" behave the same as the VERSION:
- dev-master > v1.999.x > v1.55.x
- dev-master == dev == master
- v1.55.x > dev-v1.54.x > v1.53.x
- dev-v1.54.x == v1.54.x
3) Unspecified version (self.version is None) is treated as "master".
"""
if self.version in ("master", "dev", "dev-master", None):
return True
# The left side is not master, so master on the right side wins.
if another == "master":
return False
# Treat "dev-VERSION" on the left side as "VERSION".
version: str = self.version
if version.startswith("dev-"):
version = version[4:]
return self._parse_version(version) >= self._parse_version(another)
def __str__(self):
return (
f"TestConfig(client_lang='{self.client_lang}', "
f"server_lang='{self.server_lang}', version={self.version!r})"
)
@staticmethod
def _parse_version(version: str) -> pkg_version.Version:
if version.endswith(".x"):
version = version[:-2]
return pkg_version.Version(version)
def get_lang(image_name: str) -> Lang:
return Lang.from_string(
re.search(r"/(\w+)-(client|server):", image_name).group(1)
)

@ -1,13 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,18 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from framework.infrastructure.gcp import api
from framework.infrastructure.gcp import compute
from framework.infrastructure.gcp import iam
from framework.infrastructure.gcp import network_security
from framework.infrastructure.gcp import network_services

@ -1,542 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import contextlib
import functools
import json
import logging
from typing import Any, Dict, List, Optional
from absl import flags
from google.cloud import secretmanager_v1
from google.longrunning import operations_pb2
from google.protobuf import json_format
from google.rpc import code_pb2
from google.rpc import error_details_pb2
from google.rpc import status_pb2
from googleapiclient import discovery
import googleapiclient.errors
import googleapiclient.http
import tenacity
import yaml
import framework.helpers.highlighter
logger = logging.getLogger(__name__)
PRIVATE_API_KEY_SECRET_NAME = flags.DEFINE_string(
"private_api_key_secret_name",
default=None,
help=(
"Load Private API access key from the latest version of the secret "
"with the given name, in the format projects/*/secrets/*"
),
)
V1_DISCOVERY_URI = flags.DEFINE_string(
"v1_discovery_uri",
default=discovery.V1_DISCOVERY_URI,
help="Override v1 Discovery URI",
)
V2_DISCOVERY_URI = flags.DEFINE_string(
"v2_discovery_uri",
default=discovery.V2_DISCOVERY_URI,
help="Override v2 Discovery URI",
)
COMPUTE_V1_DISCOVERY_FILE = flags.DEFINE_string(
"compute_v1_discovery_file",
default=None,
help="Load compute v1 from discovery file",
)
GCP_UI_URL = flags.DEFINE_string(
"gcp_ui_url",
default="console.cloud.google.com",
help="Override GCP UI URL.",
)
# Type aliases
_HttpError = googleapiclient.errors.HttpError
_HttpLib2Error = googleapiclient.http.httplib2.HttpLib2Error
_HighlighterYaml = framework.helpers.highlighter.HighlighterYaml
Operation = operations_pb2.Operation
HttpRequest = googleapiclient.http.HttpRequest
class GcpApiManager:
def __init__(
self,
*,
v1_discovery_uri=None,
v2_discovery_uri=None,
compute_v1_discovery_file=None,
private_api_key_secret_name=None,
gcp_ui_url=None,
):
self.v1_discovery_uri = v1_discovery_uri or V1_DISCOVERY_URI.value
self.v2_discovery_uri = v2_discovery_uri or V2_DISCOVERY_URI.value
self.compute_v1_discovery_file = (
compute_v1_discovery_file or COMPUTE_V1_DISCOVERY_FILE.value
)
self.private_api_key_secret_name = (
private_api_key_secret_name or PRIVATE_API_KEY_SECRET_NAME.value
)
self.gcp_ui_url = gcp_ui_url or GCP_UI_URL.value
# TODO(sergiitk): add options to pass google Credentials
self._exit_stack = contextlib.ExitStack()
def close(self):
self._exit_stack.close()
@property
@functools.lru_cache(None)
def private_api_key(self):
"""
Private API key.
Return API key credential that identifies a GCP project allow-listed for
accessing private API discovery documents.
https://console.cloud.google.com/apis/credentials
This method lazy-loads the content of the key from the Secret Manager.
https://console.cloud.google.com/security/secret-manager
"""
if not self.private_api_key_secret_name:
raise ValueError(
"private_api_key_secret_name must be set to "
"access private_api_key."
)
secrets_api = self.secrets("v1")
version_resource_path = secrets_api.secret_version_path(
**secrets_api.parse_secret_path(self.private_api_key_secret_name),
secret_version="latest",
)
secret: secretmanager_v1.AccessSecretVersionResponse
secret = secrets_api.access_secret_version(name=version_resource_path)
return secret.payload.data.decode()
@functools.lru_cache(None)
def compute(self, version):
api_name = "compute"
if version == "v1":
if self.compute_v1_discovery_file:
return self._build_from_file(self.compute_v1_discovery_file)
else:
return self._build_from_discovery_v1(api_name, version)
elif version == "v1alpha":
return self._build_from_discovery_v1(api_name, "alpha")
raise NotImplementedError(f"Compute {version} not supported")
@functools.lru_cache(None)
def networksecurity(self, version):
api_name = "networksecurity"
if version == "v1alpha1":
return self._build_from_discovery_v2(
api_name,
version,
api_key=self.private_api_key,
visibility_labels=["NETWORKSECURITY_ALPHA"],
)
elif version == "v1beta1":
return self._build_from_discovery_v2(api_name, version)
raise NotImplementedError(f"Network Security {version} not supported")
@functools.lru_cache(None)
def networkservices(self, version):
api_name = "networkservices"
if version == "v1alpha1":
return self._build_from_discovery_v2(
api_name,
version,
api_key=self.private_api_key,
visibility_labels=["NETWORKSERVICES_ALPHA"],
)
elif version == "v1beta1":
return self._build_from_discovery_v2(api_name, version)
raise NotImplementedError(f"Network Services {version} not supported")
@staticmethod
@functools.lru_cache(None)
def secrets(version: str):
if version == "v1":
return secretmanager_v1.SecretManagerServiceClient()
raise NotImplementedError(f"Secret Manager {version} not supported")
@functools.lru_cache(None)
def iam(self, version: str) -> discovery.Resource:
"""Identity and Access Management (IAM) API.
https://cloud.google.com/iam/docs/reference/rest
https://googleapis.github.io/google-api-python-client/docs/dyn/iam_v1.html
"""
api_name = "iam"
if version == "v1":
return self._build_from_discovery_v1(api_name, version)
raise NotImplementedError(
f"Identity and Access Management (IAM) {version} not supported"
)
def _build_from_discovery_v1(self, api_name, version):
api = discovery.build(
api_name,
version,
cache_discovery=False,
discoveryServiceUrl=self.v1_discovery_uri,
)
self._exit_stack.enter_context(api)
return api
def _build_from_discovery_v2(
self,
api_name,
version,
*,
api_key: Optional[str] = None,
visibility_labels: Optional[List] = None,
):
params = {}
if api_key:
params["key"] = api_key
if visibility_labels:
# Dash-separated list of labels.
params["labels"] = "_".join(visibility_labels)
params_str = ""
if params:
params_str = "&" + "&".join(f"{k}={v}" for k, v in params.items())
api = discovery.build(
api_name,
version,
cache_discovery=False,
discoveryServiceUrl=f"{self.v2_discovery_uri}{params_str}",
)
self._exit_stack.enter_context(api)
return api
def _build_from_file(self, discovery_file):
with open(discovery_file, "r") as f:
api = discovery.build_from_document(f.read())
self._exit_stack.enter_context(api)
return api
class Error(Exception):
"""Base error class for GCP API errors."""
class ResponseError(Error):
"""The response was not a 2xx."""
reason: str
uri: str
error_details: Optional[str]
status: Optional[int]
cause: _HttpError
def __init__(self, cause: _HttpError):
# TODO(sergiitk): cleanup when we upgrade googleapiclient:
# - remove _get_reason()
# - remove error_details note
# - use status_code()
self.reason = cause._get_reason().strip() # noqa
self.uri = cause.uri
self.error_details = cause.error_details # NOTE: Must after _get_reason
self.status = None
if cause.resp and cause.resp.status:
self.status = cause.resp.status
self.cause = cause
super().__init__()
def __repr__(self):
return (
f"<ResponseError {self.status} when requesting {self.uri} "
f'returned "{self.reason}". Details: "{self.error_details}">'
)
class TransportError(Error):
"""A transport error has occurred."""
cause: _HttpLib2Error
def __init__(self, cause: _HttpLib2Error):
self.cause = cause
super().__init__()
def __repr__(self):
return f"<TransportError cause: {self.cause!r}>"
class OperationError(Error):
"""
Operation was not successful.
Assuming Operation based on Google API Style Guide:
https://cloud.google.com/apis/design/design_patterns#long_running_operations
https://github.com/googleapis/googleapis/blob/master/google/longrunning/operations.proto
"""
api_name: str
name: str
metadata: Any
code_name: code_pb2.Code
error: status_pb2.Status
def __init__(self, api_name: str, response: dict):
self.api_name = api_name
# Operation.metadata field is Any specific to the API. It may not be
# present in the default descriptor pool, and that's expected.
# To avoid json_format.ParseError, handle it separately.
self.metadata = response.pop("metadata", {})
# Must be after removing metadata field.
operation: Operation = self._parse_operation_response(response)
self.name = operation.name or "unknown"
self.code_name = code_pb2.Code.Name(operation.error.code)
self.error = operation.error
super().__init__()
@staticmethod
def _parse_operation_response(operation_response: dict) -> Operation:
try:
return json_format.ParseDict(
operation_response,
Operation(),
ignore_unknown_fields=True,
descriptor_pool=error_details_pb2.DESCRIPTOR.pool,
)
except (json_format.Error, TypeError) as e:
# Swallow parsing errors if any. Building correct OperationError()
# is more important than losing debug information. Details still
# can be extracted from the warning.
logger.warning(
(
"Can't parse response while processing OperationError:"
" '%r', error %r"
),
operation_response,
e,
)
return Operation()
def __str__(self):
indent_l1 = " " * 2
indent_l2 = indent_l1 * 2
result = (
f'{self.api_name} operation "{self.name}" failed.\n'
f"{indent_l1}code: {self.error.code} ({self.code_name})\n"
f'{indent_l1}message: "{self.error.message}"'
)
if self.error.details:
result += f"\n{indent_l1}details: [\n"
for any_error in self.error.details:
error_str = json_format.MessageToJson(any_error)
for line in error_str.splitlines():
result += indent_l2 + line + "\n"
result += f"{indent_l1}]"
if self.metadata:
result += f"\n metadata: \n"
metadata_str = json.dumps(self.metadata, indent=2)
for line in metadata_str.splitlines():
result += indent_l2 + line + "\n"
result = result.rstrip()
return result
class GcpProjectApiResource:
# TODO(sergiitk): move someplace better
_WAIT_FOR_OPERATION_SEC = 60 * 10
_WAIT_FIXED_SEC = 2
_GCP_API_RETRIES = 5
def __init__(self, api: discovery.Resource, project: str):
self.api: discovery.Resource = api
self.project: str = project
self._highlighter = _HighlighterYaml()
# TODO(sergiitk): in upcoming GCP refactoring, differentiate between
# _execute for LRO (Long Running Operations), and immediate operations.
def _execute(
self,
request: HttpRequest,
*,
num_retries: Optional[int] = _GCP_API_RETRIES,
) -> Dict[str, Any]:
"""Execute the immediate request.
Returns:
Unmarshalled response as a dictionary.
Raises:
ResponseError if the response was not a 2xx.
TransportError if a transport error has occurred.
"""
if num_retries is None:
num_retries = self._GCP_API_RETRIES
try:
return request.execute(num_retries=num_retries)
except _HttpError as error:
raise ResponseError(error)
except _HttpLib2Error as error:
raise TransportError(error)
def resource_pretty_format(
self,
resource: Any,
*,
highlight: bool = True,
) -> str:
"""Return a string with pretty-printed resource body."""
yaml_out: str = yaml.dump(
resource,
explicit_start=True,
explicit_end=True,
)
return self._highlighter.highlight(yaml_out) if highlight else yaml_out
def resources_pretty_format(
self,
resources: list[Any],
*,
highlight: bool = True,
) -> str:
out = []
for resource in resources:
if hasattr(resource, "name"):
out.append(f"{resource.name}:")
elif "name" in resource:
out.append(f"{resource['name']}:")
out.append(
self.resource_pretty_format(resource, highlight=highlight)
)
return "\n".join(out)
@staticmethod
def wait_for_operation(
operation_request,
test_success_fn,
timeout_sec=_WAIT_FOR_OPERATION_SEC,
wait_sec=_WAIT_FIXED_SEC,
):
retryer = tenacity.Retrying(
retry=(
tenacity.retry_if_not_result(test_success_fn)
| tenacity.retry_if_exception_type()
),
wait=tenacity.wait_fixed(wait_sec),
stop=tenacity.stop_after_delay(timeout_sec),
after=tenacity.after_log(logger, logging.DEBUG),
reraise=True,
)
return retryer(operation_request.execute)
class GcpStandardCloudApiResource(GcpProjectApiResource, metaclass=abc.ABCMeta):
GLOBAL_LOCATION = "global"
def parent(self, location: Optional[str] = GLOBAL_LOCATION):
if location is None:
location = self.GLOBAL_LOCATION
return f"projects/{self.project}/locations/{location}"
def resource_full_name(self, name, collection_name):
return f"{self.parent()}/{collection_name}/{name}"
def _create_resource(
self, collection: discovery.Resource, body: dict, **kwargs
):
logger.info(
"Creating %s resource:\n%s",
self.api_name,
self.resource_pretty_format(body),
)
create_req = collection.create(
parent=self.parent(), body=body, **kwargs
)
self._execute(create_req)
@property
@abc.abstractmethod
def api_name(self) -> str:
raise NotImplementedError
@property
@abc.abstractmethod
def api_version(self) -> str:
raise NotImplementedError
def _get_resource(self, collection: discovery.Resource, full_name):
resource = collection.get(name=full_name).execute()
logger.info(
"Loaded %s:\n%s", full_name, self.resource_pretty_format(resource)
)
return resource
def _delete_resource(
self, collection: discovery.Resource, full_name: str
) -> bool:
logger.debug("Deleting %s", full_name)
try:
self._execute(collection.delete(name=full_name))
return True
except _HttpError as error:
if error.resp and error.resp.status == 404:
logger.debug("%s not deleted since it doesn't exist", full_name)
else:
logger.warning("Failed to delete %s, %r", full_name, error)
return False
# TODO(sergiitk): Use ResponseError and TransportError
def _execute( # pylint: disable=arguments-differ
self,
request: HttpRequest,
timeout_sec: int = GcpProjectApiResource._WAIT_FOR_OPERATION_SEC,
):
operation = request.execute(num_retries=self._GCP_API_RETRIES)
logger.debug("Operation %s", operation)
self._wait(operation["name"], timeout_sec)
def _wait(
self,
operation_id: str,
timeout_sec: int = GcpProjectApiResource._WAIT_FOR_OPERATION_SEC,
):
logger.info(
"Waiting %s sec for %s operation id: %s",
timeout_sec,
self.api_name,
operation_id,
)
op_request = (
self.api.projects().locations().operations().get(name=operation_id)
)
operation = self.wait_for_operation(
operation_request=op_request,
test_success_fn=lambda result: result["done"],
timeout_sec=timeout_sec,
)
logger.debug("Completed operation: %s", operation)
if "error" in operation:
raise OperationError(self.api_name, operation)

@ -1,637 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import dataclasses
import datetime
import enum
import logging
from typing import Any, Dict, List, Optional, Set
from googleapiclient import discovery
import googleapiclient.errors
import httplib2
import framework.errors
from framework.helpers import retryers
from framework.infrastructure import gcp
logger = logging.getLogger(__name__)
DEBUG_HEADER_IN_RESPONSE = "x-encrypted-debug-headers"
DEBUG_HEADER_KEY = "X-Return-Encrypted-Headers"
class ComputeV1(
gcp.api.GcpProjectApiResource
): # pylint: disable=too-many-public-methods
# TODO(sergiitk): move someplace better
_WAIT_FOR_BACKEND_SEC = 60 * 10
_WAIT_FOR_BACKEND_SLEEP_SEC = 4
_WAIT_FOR_OPERATION_SEC = 60 * 10
gfe_debug_header: Optional[str]
@dataclasses.dataclass(frozen=True)
class GcpResource:
name: str
url: str
@dataclasses.dataclass(frozen=True)
class ZonalGcpResource(GcpResource):
zone: str
def __init__(
self,
api_manager: gcp.api.GcpApiManager,
project: str,
gfe_debug_header: Optional[str] = None,
version: str = "v1",
):
super().__init__(api_manager.compute(version), project)
self.gfe_debug_header = gfe_debug_header
class HealthCheckProtocol(enum.Enum):
TCP = enum.auto()
GRPC = enum.auto()
class BackendServiceProtocol(enum.Enum):
HTTP2 = enum.auto()
GRPC = enum.auto()
def create_health_check(
self,
name: str,
protocol: HealthCheckProtocol,
*,
port: Optional[int] = None,
) -> "GcpResource":
if protocol is self.HealthCheckProtocol.TCP:
health_check_field = "tcpHealthCheck"
elif protocol is self.HealthCheckProtocol.GRPC:
health_check_field = "grpcHealthCheck"
else:
raise TypeError(f"Unexpected Health Check protocol: {protocol}")
health_check_settings = {}
if port is None:
health_check_settings["portSpecification"] = "USE_SERVING_PORT"
else:
health_check_settings["portSpecification"] = "USE_FIXED_PORT"
health_check_settings["port"] = port
return self._insert_resource(
self.api.healthChecks(),
{
"name": name,
"type": protocol.name,
health_check_field: health_check_settings,
},
)
def list_health_check(self):
return self._list_resource(self.api.healthChecks())
def delete_health_check(self, name):
self._delete_resource(self.api.healthChecks(), "healthCheck", name)
def create_firewall_rule(
self,
name: str,
network_url: str,
source_ranges: List[str],
ports: List[str],
) -> Optional["GcpResource"]:
try:
return self._insert_resource(
self.api.firewalls(),
{
"allowed": [{"IPProtocol": "tcp", "ports": ports}],
"direction": "INGRESS",
"name": name,
"network": network_url,
"priority": 1000,
"sourceRanges": source_ranges,
"targetTags": ["allow-health-checks"],
},
)
except googleapiclient.errors.HttpError as http_error:
# TODO(lidiz) use status_code() when we upgrade googleapiclient
if http_error.resp.status == 409:
logger.debug("Firewall rule %s already existed", name)
return None
else:
raise
def delete_firewall_rule(self, name):
self._delete_resource(self.api.firewalls(), "firewall", name)
def create_backend_service_traffic_director(
self,
name: str,
health_check: "GcpResource",
affinity_header: Optional[str] = None,
protocol: Optional[BackendServiceProtocol] = None,
subset_size: Optional[int] = None,
locality_lb_policies: Optional[List[dict]] = None,
outlier_detection: Optional[dict] = None,
) -> "GcpResource":
if not isinstance(protocol, self.BackendServiceProtocol):
raise TypeError(f"Unexpected Backend Service protocol: {protocol}")
body = {
"name": name,
"loadBalancingScheme": "INTERNAL_SELF_MANAGED", # Traffic Director
"healthChecks": [health_check.url],
"protocol": protocol.name,
}
# If affinity header is specified, config the backend service to support
# affinity, and set affinity header to the one given.
if affinity_header:
body["sessionAffinity"] = "HEADER_FIELD"
body["localityLbPolicy"] = "RING_HASH"
body["consistentHash"] = {
"httpHeaderName": affinity_header,
}
if subset_size:
body["subsetting"] = {
"policy": "CONSISTENT_HASH_SUBSETTING",
"subsetSize": subset_size,
}
if locality_lb_policies:
body["localityLbPolicies"] = locality_lb_policies
if outlier_detection:
body["outlierDetection"] = outlier_detection
return self._insert_resource(self.api.backendServices(), body)
def get_backend_service_traffic_director(self, name: str) -> "GcpResource":
return self._get_resource(
self.api.backendServices(), backendService=name
)
def patch_backend_service(self, backend_service, body, **kwargs):
self._patch_resource(
collection=self.api.backendServices(),
backendService=backend_service.name,
body=body,
**kwargs,
)
def backend_service_patch_backends(
self,
backend_service,
backends,
max_rate_per_endpoint: Optional[int] = None,
):
if max_rate_per_endpoint is None:
max_rate_per_endpoint = 5
backend_list = [
{
"group": backend.url,
"balancingMode": "RATE",
"maxRatePerEndpoint": max_rate_per_endpoint,
}
for backend in backends
]
self._patch_resource(
collection=self.api.backendServices(),
body={"backends": backend_list},
backendService=backend_service.name,
)
def backend_service_remove_all_backends(self, backend_service):
self._patch_resource(
collection=self.api.backendServices(),
body={"backends": []},
backendService=backend_service.name,
)
def delete_backend_service(self, name):
self._delete_resource(
self.api.backendServices(), "backendService", name
)
def create_url_map(
self,
name: str,
matcher_name: str,
src_hosts,
dst_default_backend_service: "GcpResource",
dst_host_rule_match_backend_service: Optional["GcpResource"] = None,
) -> "GcpResource":
if dst_host_rule_match_backend_service is None:
dst_host_rule_match_backend_service = dst_default_backend_service
return self._insert_resource(
self.api.urlMaps(),
{
"name": name,
"defaultService": dst_default_backend_service.url,
"hostRules": [
{
"hosts": src_hosts,
"pathMatcher": matcher_name,
}
],
"pathMatchers": [
{
"name": matcher_name,
"defaultService": dst_host_rule_match_backend_service.url,
}
],
},
)
def create_url_map_with_content(self, url_map_body: Any) -> "GcpResource":
return self._insert_resource(self.api.urlMaps(), url_map_body)
def patch_url_map(self, url_map: "GcpResource", body, **kwargs):
self._patch_resource(
collection=self.api.urlMaps(),
urlMap=url_map.name,
body=body,
**kwargs,
)
def delete_url_map(self, name):
self._delete_resource(self.api.urlMaps(), "urlMap", name)
def create_target_grpc_proxy(
self,
name: str,
url_map: "GcpResource",
validate_for_proxyless: bool = True,
) -> "GcpResource":
return self._insert_resource(
self.api.targetGrpcProxies(),
{
"name": name,
"url_map": url_map.url,
"validate_for_proxyless": validate_for_proxyless,
},
)
def delete_target_grpc_proxy(self, name):
self._delete_resource(
self.api.targetGrpcProxies(), "targetGrpcProxy", name
)
def create_target_http_proxy(
self,
name: str,
url_map: "GcpResource",
) -> "GcpResource":
return self._insert_resource(
self.api.targetHttpProxies(),
{
"name": name,
"url_map": url_map.url,
},
)
def delete_target_http_proxy(self, name):
self._delete_resource(
self.api.targetHttpProxies(), "targetHttpProxy", name
)
def create_forwarding_rule(
self,
name: str,
src_port: int,
target_proxy: "GcpResource",
network_url: str,
*,
ip_address: str = "0.0.0.0",
) -> "GcpResource":
return self._insert_resource(
self.api.globalForwardingRules(),
{
"name": name,
"loadBalancingScheme": "INTERNAL_SELF_MANAGED", # Traffic Director
"portRange": src_port,
"IPAddress": ip_address,
"network": network_url,
"target": target_proxy.url,
},
)
def exists_forwarding_rule(self, src_port) -> bool:
# TODO(sergiitk): Better approach for confirming the port is available.
# It's possible a rule allocates actual port range, e.g 8000-9000,
# and this wouldn't catch it. For now, we assume there's no
# port ranges used in the project.
filter_str = (
f'(portRange eq "{src_port}-{src_port}") '
'(IPAddress eq "0.0.0.0")'
'(loadBalancingScheme eq "INTERNAL_SELF_MANAGED")'
)
return self._exists_resource(
self.api.globalForwardingRules(), resource_filter=filter_str
)
def delete_forwarding_rule(self, name):
self._delete_resource(
self.api.globalForwardingRules(), "forwardingRule", name
)
def wait_for_network_endpoint_group(
self,
name: str,
zone: str,
*,
timeout_sec=_WAIT_FOR_BACKEND_SEC,
wait_sec=_WAIT_FOR_BACKEND_SLEEP_SEC,
):
retryer = retryers.constant_retryer(
wait_fixed=datetime.timedelta(seconds=wait_sec),
timeout=datetime.timedelta(seconds=timeout_sec),
check_result=lambda neg: neg and neg.get("size", 0) > 0,
)
network_endpoint_group = retryer(
self._retry_load_network_endpoint_group, name, zone
)
# TODO(sergiitk): dataclass
return self.ZonalGcpResource(
network_endpoint_group["name"],
network_endpoint_group["selfLink"],
zone,
)
def _retry_load_network_endpoint_group(self, name: str, zone: str):
try:
neg = self.get_network_endpoint_group(name, zone)
logger.debug(
"Waiting for endpoints: NEG %s in zone %s, current count %s",
neg["name"],
zone,
neg.get("size"),
)
except googleapiclient.errors.HttpError as error:
# noinspection PyProtectedMember
reason = error._get_reason()
logger.debug(
"Retrying NEG load, got %s, details %s",
error.resp.status,
reason,
)
raise
return neg
def get_network_endpoint_group(self, name, zone):
neg = (
self.api.networkEndpointGroups()
.get(project=self.project, networkEndpointGroup=name, zone=zone)
.execute()
)
# TODO(sergiitk): dataclass
return neg
def wait_for_backends_healthy_status(
self,
backend_service: GcpResource,
backends: Set[ZonalGcpResource],
*,
timeout_sec: int = _WAIT_FOR_BACKEND_SEC,
wait_sec: int = _WAIT_FOR_BACKEND_SLEEP_SEC,
) -> None:
if not backends:
raise ValueError("The list of backends to wait on is empty")
timeout = datetime.timedelta(seconds=timeout_sec)
retryer = retryers.constant_retryer(
wait_fixed=datetime.timedelta(seconds=wait_sec),
timeout=timeout,
check_result=lambda result: result,
)
pending = set(backends)
try:
retryer(self._retry_backends_health, backend_service, pending)
except retryers.RetryError as retry_err:
unhealthy_backends: str = ",".join(
[backend.name for backend in pending]
)
# Attempt to load backend health info for better debug info.
try:
unhealthy = []
# Everything left in pending was unhealthy on the last retry.
for backend in pending:
# It's possible the health status has changed since we
# gave up retrying, but this should be very rare.
health_status = self.get_backend_service_backend_health(
backend_service,
backend,
)
unhealthy.append(
{"name": backend.name, "health_status": health_status}
)
# Override the plain list of unhealthy backend name with
# the one showing the latest backend statuses.
unhealthy_backends = self.resources_pretty_format(
unhealthy,
highlight=False,
)
except Exception as error: # noqa pylint: disable=broad-except
logger.debug(
"Couldn't load backend health info, plain list name"
"will be printed instead. Error: %r",
error,
)
retry_err.add_note(
framework.errors.FrameworkError.note_blanket_error_info_below(
"One or several NEGs (Network Endpoint Groups) didn't"
" report HEALTHY status within expected timeout.",
info_below=(
f"Timeout {timeout} (h:mm:ss) waiting for backend"
f" service '{backend_service.name}' to report all NEGs"
" in the HEALTHY status:"
f" {[backend.name for backend in backends]}."
f"\nUnhealthy backends:\n{unhealthy_backends}"
),
)
)
raise
def _retry_backends_health(
self, backend_service: GcpResource, pending: Set[ZonalGcpResource]
):
for backend in pending:
result = self.get_backend_service_backend_health(
backend_service, backend
)
if "healthStatus" not in result:
logger.debug(
"Waiting for instances: backend %s, zone %s",
backend.name,
backend.zone,
)
continue
backend_healthy = True
for instance in result["healthStatus"]:
logger.debug(
"Backend %s in zone %s: instance %s:%s health: %s",
backend.name,
backend.zone,
instance["ipAddress"],
instance["port"],
instance["healthState"],
)
if instance["healthState"] != "HEALTHY":
backend_healthy = False
if backend_healthy:
logger.info(
"Backend %s in zone %s reported healthy",
backend.name,
backend.zone,
)
pending.remove(backend)
return not pending
def get_backend_service_backend_health(self, backend_service, backend):
return (
self.api.backendServices()
.getHealth(
project=self.project,
backendService=backend_service.name,
body={"group": backend.url},
)
.execute()
)
def _get_resource(
self, collection: discovery.Resource, **kwargs
) -> "GcpResource":
resp = collection.get(project=self.project, **kwargs).execute()
logger.info(
"Loaded compute resource:\n%s", self.resource_pretty_format(resp)
)
return self.GcpResource(resp["name"], resp["selfLink"])
def _exists_resource(
self, collection: discovery.Resource, resource_filter: str
) -> bool:
resp = collection.list(
project=self.project, filter=resource_filter, maxResults=1
).execute(num_retries=self._GCP_API_RETRIES)
if "kind" not in resp:
# TODO(sergiitk): better error
raise ValueError('List response "kind" is missing')
return "items" in resp and resp["items"]
def _insert_resource(
self, collection: discovery.Resource, body: Dict[str, Any]
) -> "GcpResource":
logger.info(
"Creating compute resource:\n%s", self.resource_pretty_format(body)
)
resp = self._execute(collection.insert(project=self.project, body=body))
return self.GcpResource(body["name"], resp["targetLink"])
def _patch_resource(self, collection, body, **kwargs):
logger.info(
"Patching compute resource:\n%s", self.resource_pretty_format(body)
)
self._execute(
collection.patch(project=self.project, body=body, **kwargs)
)
def _list_resource(self, collection: discovery.Resource):
return collection.list(project=self.project).execute(
num_retries=self._GCP_API_RETRIES
)
def _delete_resource(
self,
collection: discovery.Resource,
resource_type: str,
resource_name: str,
) -> bool:
try:
params = {"project": self.project, resource_type: resource_name}
self._execute(collection.delete(**params))
return True
except googleapiclient.errors.HttpError as error:
if error.resp and error.resp.status == 404:
logger.debug(
"Resource %s %s not deleted since it doesn't exist",
resource_type,
resource_name,
)
else:
logger.warning(
'Failed to delete %s "%s", %r',
resource_type,
resource_name,
error,
)
return False
@staticmethod
def _operation_status_done(operation):
return "status" in operation and operation["status"] == "DONE"
@staticmethod
def _log_debug_header(resp: httplib2.Response):
if (
DEBUG_HEADER_IN_RESPONSE in resp
and resp.status >= 300
and resp.status != 404
):
logger.info(
"Received GCP debug headers: %s",
resp[DEBUG_HEADER_IN_RESPONSE],
)
def _execute( # pylint: disable=arguments-differ
self, request, *, timeout_sec=_WAIT_FOR_OPERATION_SEC
):
if self.gfe_debug_header:
logger.debug(
"Adding debug headers for method: %s", request.methodId
)
request.headers[DEBUG_HEADER_KEY] = self.gfe_debug_header
request.add_response_callback(self._log_debug_header)
operation = request.execute(num_retries=self._GCP_API_RETRIES)
logger.debug("Operation %s", operation)
return self._wait(operation["name"], timeout_sec)
def _wait(
self, operation_id: str, timeout_sec: int = _WAIT_FOR_OPERATION_SEC
) -> dict:
logger.info(
"Waiting %s sec for compute operation id: %s",
timeout_sec,
operation_id,
)
# TODO(sergiitk) try using wait() here
# https://googleapis.github.io/google-api-python-client/docs/dyn/compute_v1.globalOperations.html#wait
op_request = self.api.globalOperations().get(
project=self.project, operation=operation_id
)
operation = self.wait_for_operation(
operation_request=op_request,
test_success_fn=self._operation_status_done,
timeout_sec=timeout_sec,
)
logger.debug("Completed operation: %s", operation)
if "error" in operation:
# This shouldn't normally happen: gcp library raises on errors.
raise Exception(
f"Compute operation {operation_id} failed: {operation}"
)
return operation

@ -1,361 +0,0 @@
# Copyright 2021 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import dataclasses
import datetime
import functools
import logging
from typing import Any, Dict, FrozenSet, Optional
from framework.helpers import retryers
from framework.infrastructure import gcp
logger = logging.getLogger(__name__)
# Type aliases
_timedelta = datetime.timedelta
_HttpRequest = gcp.api.HttpRequest
class EtagConflict(gcp.api.Error):
"""
Indicates concurrent policy changes.
https://cloud.google.com/iam/docs/policies#etag
"""
def handle_etag_conflict(func):
def wrap_retry_on_etag_conflict(*args, **kwargs):
retryer = retryers.exponential_retryer_with_timeout(
retry_on_exceptions=(EtagConflict, gcp.api.TransportError),
wait_min=_timedelta(seconds=1),
wait_max=_timedelta(seconds=10),
timeout=_timedelta(minutes=2),
)
return retryer(func, *args, **kwargs)
return wrap_retry_on_etag_conflict
def _replace_binding(
policy: "Policy", binding: "Policy.Binding", new_binding: "Policy.Binding"
) -> "Policy":
new_bindings = set(policy.bindings)
new_bindings.discard(binding)
new_bindings.add(new_binding)
# pylint: disable=too-many-function-args # No idea why pylint is like that.
return dataclasses.replace(policy, bindings=frozenset(new_bindings))
@dataclasses.dataclass(frozen=True)
class ServiceAccount:
"""An IAM service account.
https://cloud.google.com/iam/docs/reference/rest/v1/projects.serviceAccounts
Note: "etag" field is skipped because it's deprecated
"""
name: str
projectId: str
uniqueId: str
email: str
oauth2ClientId: str
displayName: str = ""
description: str = ""
disabled: bool = False
@classmethod
def from_response(cls, response: Dict[str, Any]) -> "ServiceAccount":
return cls(
name=response["name"],
projectId=response["projectId"],
uniqueId=response["uniqueId"],
email=response["email"],
oauth2ClientId=response["oauth2ClientId"],
description=response.get("description", ""),
displayName=response.get("displayName", ""),
disabled=response.get("disabled", False),
)
def as_dict(self) -> Dict[str, Any]:
return dataclasses.asdict(self)
@dataclasses.dataclass(frozen=True)
class Expr:
"""
Represents a textual expression in the Common Expression Language syntax.
https://cloud.google.com/iam/docs/reference/rest/v1/Expr
"""
expression: str
title: str = ""
description: str = ""
location: str = ""
@classmethod
def from_response(cls, response: Dict[str, Any]) -> "Expr":
return cls(**response)
def as_dict(self) -> Dict[str, Any]:
return dataclasses.asdict(self)
@dataclasses.dataclass(frozen=True)
class Policy:
"""An Identity and Access Management (IAM) policy, which specifies
access controls for Google Cloud resources.
https://cloud.google.com/iam/docs/reference/rest/v1/Policy
Note: auditConfigs not supported by this implementation.
"""
@dataclasses.dataclass(frozen=True)
class Binding:
"""Policy Binding. Associates members with a role.
https://cloud.google.com/iam/docs/reference/rest/v1/Policy#binding
"""
role: str
members: FrozenSet[str]
condition: Optional[Expr] = None
@classmethod
def from_response(cls, response: Dict[str, Any]) -> "Policy.Binding":
fields = {
"role": response["role"],
"members": frozenset(response.get("members", [])),
}
if "condition" in response:
fields["condition"] = Expr.from_response(response["condition"])
return cls(**fields)
def as_dict(self) -> Dict[str, Any]:
result = {
"role": self.role,
"members": list(self.members),
}
if self.condition is not None:
result["condition"] = self.condition.as_dict()
return result
bindings: FrozenSet[Binding]
etag: str
version: Optional[int] = None
@functools.lru_cache(maxsize=128)
def find_binding_for_role(
self, role: str, condition: Optional[Expr] = None
) -> Optional["Policy.Binding"]:
results = (
binding
for binding in self.bindings
if binding.role == role and binding.condition == condition
)
return next(results, None)
@classmethod
def from_response(cls, response: Dict[str, Any]) -> "Policy":
bindings = frozenset(
cls.Binding.from_response(b) for b in response.get("bindings", [])
)
return cls(
bindings=bindings,
etag=response["etag"],
version=response.get("version"),
)
def as_dict(self) -> Dict[str, Any]:
result = {
"bindings": [binding.as_dict() for binding in self.bindings],
"etag": self.etag,
}
if self.version is not None:
result["version"] = self.version
return result
class IamV1(gcp.api.GcpProjectApiResource):
"""
Identity and Access Management (IAM) API.
https://cloud.google.com/iam/docs/reference/rest
"""
_service_accounts: gcp.api.discovery.Resource
# Operations that affect conditional role bindings must specify version 3.
# Otherwise conditions are omitted, and role names returned with a suffix,
# f.e. roles/iam.workloadIdentityUser_withcond_f1ec33c9beb41857dbf0
# https://cloud.google.com/iam/docs/reference/rest/v1/Policy#FIELDS.version
POLICY_VERSION: int = 3
def __init__(self, api_manager: gcp.api.GcpApiManager, project: str):
super().__init__(api_manager.iam("v1"), project)
# Shortcut to projects/*/serviceAccounts/ endpoints
self._service_accounts = self.api.projects().serviceAccounts()
def service_account_resource_name(self, account) -> str:
"""
Returns full resource name of the service account.
The resource name of the service account in the following format:
projects/{PROJECT_ID}/serviceAccounts/{ACCOUNT}.
The ACCOUNT value can be the email address or the uniqueId of the
service account.
Ref https://cloud.google.com/iam/docs/reference/rest/v1/projects.serviceAccounts/get
Args:
account: The ACCOUNT value
"""
return f"projects/{self.project}/serviceAccounts/{account}"
def get_service_account(self, account: str) -> ServiceAccount:
resource_name = self.service_account_resource_name(account)
request: _HttpRequest = self._service_accounts.get(name=resource_name)
response: Dict[str, Any] = self._execute(request)
logger.debug(
"Loaded Service Account:\n%s", self.resource_pretty_format(response)
)
return ServiceAccount.from_response(response)
def get_service_account_iam_policy(self, account: str) -> Policy:
resource_name = self.service_account_resource_name(account)
request: _HttpRequest = self._service_accounts.getIamPolicy(
resource=resource_name,
options_requestedPolicyVersion=self.POLICY_VERSION,
)
response: Dict[str, Any] = self._execute(request)
logger.debug(
"Loaded Service Account Policy:\n%s",
self.resource_pretty_format(response),
)
return Policy.from_response(response)
def set_service_account_iam_policy(
self, account: str, policy: Policy
) -> Policy:
"""Sets the IAM policy that is attached to a service account.
https://cloud.google.com/iam/docs/reference/rest/v1/projects.serviceAccounts/setIamPolicy
"""
resource_name = self.service_account_resource_name(account)
body = {"policy": policy.as_dict()}
logger.debug(
"Updating Service Account %s policy:\n%s",
account,
self.resource_pretty_format(body),
)
try:
request: _HttpRequest = self._service_accounts.setIamPolicy(
resource=resource_name, body=body
)
response: Dict[str, Any] = self._execute(request)
return Policy.from_response(response)
except gcp.api.ResponseError as error:
if error.status == 409:
# https://cloud.google.com/iam/docs/policies#etag
logger.debug(error)
raise EtagConflict from error
raise
@handle_etag_conflict
def add_service_account_iam_policy_binding(
self, account: str, role: str, member: str
) -> None:
"""Add an IAM policy binding to an IAM service account.
See for details on updating policy bindings:
https://cloud.google.com/iam/docs/reference/rest/v1/projects.serviceAccounts/setIamPolicy
"""
policy: Policy = self.get_service_account_iam_policy(account)
binding: Optional[Policy.Binding] = policy.find_binding_for_role(role)
if binding and member in binding.members:
logger.debug(
"Member %s already has role %s for Service Account %s",
member,
role,
account,
)
return
if binding is None:
updated_binding = Policy.Binding(role, frozenset([member]))
else:
updated_members: FrozenSet[str] = binding.members.union({member})
updated_binding: Policy.Binding = (
dataclasses.replace( # pylint: disable=too-many-function-args
binding, members=updated_members
)
)
updated_policy: Policy = _replace_binding(
policy, binding, updated_binding
)
self.set_service_account_iam_policy(account, updated_policy)
logger.debug(
"Role %s granted to member %s for Service Account %s",
role,
member,
account,
)
@handle_etag_conflict
def remove_service_account_iam_policy_binding(
self, account: str, role: str, member: str
) -> None:
"""Remove an IAM policy binding from the IAM policy of a service
account.
See for details on updating policy bindings:
https://cloud.google.com/iam/docs/reference/rest/v1/projects.serviceAccounts/setIamPolicy
"""
policy: Policy = self.get_service_account_iam_policy(account)
binding: Optional[Policy.Binding] = policy.find_binding_for_role(role)
if binding is None:
logger.debug(
"Noop: Service Account %s has no bindings for role %s",
account,
role,
)
return
if member not in binding.members:
logger.debug(
"Noop: Service Account %s binding for role %s has no member %s",
account,
role,
member,
)
return
updated_members: FrozenSet[str] = binding.members.difference({member})
updated_binding: Policy.Binding = (
dataclasses.replace( # pylint: disable=too-many-function-args
binding, members=updated_members
)
)
updated_policy: Policy = _replace_binding(
policy, binding, updated_binding
)
self.set_service_account_iam_policy(account, updated_policy)
logger.debug(
"Role %s revoked from member %s for Service Account %s",
role,
member,
account,
)

@ -1,221 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import dataclasses
import logging
from typing import Any, Dict
from google.rpc import code_pb2
import tenacity
from framework.infrastructure import gcp
logger = logging.getLogger(__name__)
# Type aliases
GcpResource = gcp.compute.ComputeV1.GcpResource
@dataclasses.dataclass(frozen=True)
class ServerTlsPolicy:
url: str
name: str
server_certificate: dict
mtls_policy: dict
update_time: str
create_time: str
@classmethod
def from_response(
cls, name: str, response: Dict[str, Any]
) -> "ServerTlsPolicy":
return cls(
name=name,
url=response["name"],
server_certificate=response.get("serverCertificate", {}),
mtls_policy=response.get("mtlsPolicy", {}),
create_time=response["createTime"],
update_time=response["updateTime"],
)
@dataclasses.dataclass(frozen=True)
class ClientTlsPolicy:
url: str
name: str
client_certificate: dict
server_validation_ca: list
update_time: str
create_time: str
@classmethod
def from_response(
cls, name: str, response: Dict[str, Any]
) -> "ClientTlsPolicy":
return cls(
name=name,
url=response["name"],
client_certificate=response.get("clientCertificate", {}),
server_validation_ca=response.get("serverValidationCa", []),
create_time=response["createTime"],
update_time=response["updateTime"],
)
@dataclasses.dataclass(frozen=True)
class AuthorizationPolicy:
url: str
name: str
update_time: str
create_time: str
action: str
rules: list
@classmethod
def from_response(
cls, name: str, response: Dict[str, Any]
) -> "AuthorizationPolicy":
return cls(
name=name,
url=response["name"],
create_time=response["createTime"],
update_time=response["updateTime"],
action=response["action"],
rules=response.get("rules", []),
)
class _NetworkSecurityBase(
gcp.api.GcpStandardCloudApiResource, metaclass=abc.ABCMeta
):
"""Base class for NetworkSecurity APIs."""
# TODO(https://github.com/grpc/grpc/issues/29532) remove pylint disable
# pylint: disable=abstract-method
def __init__(self, api_manager: gcp.api.GcpApiManager, project: str):
super().__init__(api_manager.networksecurity(self.api_version), project)
# Shortcut to projects/*/locations/ endpoints
self._api_locations = self.api.projects().locations()
@property
def api_name(self) -> str:
return "networksecurity"
def _execute(
self, *args, **kwargs
): # pylint: disable=signature-differs,arguments-differ
# Workaround TD bug: throttled operations are reported as internal.
# Ref b/175345578
retryer = tenacity.Retrying(
retry=tenacity.retry_if_exception(self._operation_internal_error),
wait=tenacity.wait_fixed(10),
stop=tenacity.stop_after_delay(5 * 60),
before_sleep=tenacity.before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
retryer(super()._execute, *args, **kwargs)
@staticmethod
def _operation_internal_error(exception):
return (
isinstance(exception, gcp.api.OperationError)
and exception.error.code == code_pb2.INTERNAL
)
class NetworkSecurityV1Beta1(_NetworkSecurityBase):
"""NetworkSecurity API v1beta1."""
SERVER_TLS_POLICIES = "serverTlsPolicies"
CLIENT_TLS_POLICIES = "clientTlsPolicies"
AUTHZ_POLICIES = "authorizationPolicies"
@property
def api_version(self) -> str:
return "v1beta1"
def create_server_tls_policy(self, name: str, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.serverTlsPolicies(),
body=body,
serverTlsPolicyId=name,
)
def get_server_tls_policy(self, name: str) -> ServerTlsPolicy:
response = self._get_resource(
collection=self._api_locations.serverTlsPolicies(),
full_name=self.resource_full_name(name, self.SERVER_TLS_POLICIES),
)
return ServerTlsPolicy.from_response(name, response)
def delete_server_tls_policy(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.serverTlsPolicies(),
full_name=self.resource_full_name(name, self.SERVER_TLS_POLICIES),
)
def create_client_tls_policy(self, name: str, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.clientTlsPolicies(),
body=body,
clientTlsPolicyId=name,
)
def get_client_tls_policy(self, name: str) -> ClientTlsPolicy:
response = self._get_resource(
collection=self._api_locations.clientTlsPolicies(),
full_name=self.resource_full_name(name, self.CLIENT_TLS_POLICIES),
)
return ClientTlsPolicy.from_response(name, response)
def delete_client_tls_policy(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.clientTlsPolicies(),
full_name=self.resource_full_name(name, self.CLIENT_TLS_POLICIES),
)
def create_authz_policy(self, name: str, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.authorizationPolicies(),
body=body,
authorizationPolicyId=name,
)
def get_authz_policy(self, name: str) -> ClientTlsPolicy:
response = self._get_resource(
collection=self._api_locations.authorizationPolicies(),
full_name=self.resource_full_name(name, self.AUTHZ_POLICIES),
)
return ClientTlsPolicy.from_response(name, response)
def delete_authz_policy(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.authorizationPolicies(),
full_name=self.resource_full_name(name, self.AUTHZ_POLICIES),
)
class NetworkSecurityV1Alpha1(NetworkSecurityV1Beta1):
"""NetworkSecurity API v1alpha1.
Note: extending v1beta1 class presumes that v1beta1 is just a v1alpha1 API
graduated into a more stable version. This is true in most cases. However,
v1alpha1 class can always override and reimplement incompatible methods.
"""
@property
def api_version(self) -> str:
return "v1alpha1"

@ -1,461 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import dataclasses
import logging
from typing import Any, Dict, List, Optional, Tuple
from google.rpc import code_pb2
import tenacity
from framework.infrastructure import gcp
logger = logging.getLogger(__name__)
# Type aliases
GcpResource = gcp.compute.ComputeV1.GcpResource
@dataclasses.dataclass(frozen=True)
class EndpointPolicy:
url: str
name: str
type: str
traffic_port_selector: dict
endpoint_matcher: dict
update_time: str
create_time: str
http_filters: Optional[dict] = None
server_tls_policy: Optional[str] = None
@classmethod
def from_response(
cls, name: str, response: Dict[str, Any]
) -> "EndpointPolicy":
return cls(
name=name,
url=response["name"],
type=response["type"],
server_tls_policy=response.get("serverTlsPolicy", None),
traffic_port_selector=response["trafficPortSelector"],
endpoint_matcher=response["endpointMatcher"],
http_filters=response.get("httpFilters", None),
update_time=response["updateTime"],
create_time=response["createTime"],
)
@dataclasses.dataclass(frozen=True)
class Mesh:
name: str
url: str
routes: Optional[List[str]]
@classmethod
def from_response(cls, name: str, d: Dict[str, Any]) -> "Mesh":
return cls(
name=name,
url=d["name"],
routes=list(d["routes"]) if "routes" in d else None,
)
@dataclasses.dataclass(frozen=True)
class GrpcRoute:
@dataclasses.dataclass(frozen=True)
class MethodMatch:
type: Optional[str]
grpc_service: Optional[str]
grpc_method: Optional[str]
case_sensitive: Optional[bool]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "GrpcRoute.MethodMatch":
return cls(
type=d.get("type"),
grpc_service=d.get("grpcService"),
grpc_method=d.get("grpcMethod"),
case_sensitive=d.get("caseSensitive"),
)
@dataclasses.dataclass(frozen=True)
class HeaderMatch:
type: Optional[str]
key: str
value: str
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "GrpcRoute.HeaderMatch":
return cls(
type=d.get("type"),
key=d["key"],
value=d["value"],
)
@dataclasses.dataclass(frozen=True)
class RouteMatch:
method: Optional["GrpcRoute.MethodMatch"]
headers: Tuple["GrpcRoute.HeaderMatch"]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "GrpcRoute.RouteMatch":
return cls(
method=GrpcRoute.MethodMatch.from_response(d["method"])
if "method" in d
else None,
headers=tuple(
GrpcRoute.HeaderMatch.from_response(h) for h in d["headers"]
)
if "headers" in d
else (),
)
@dataclasses.dataclass(frozen=True)
class Destination:
service_name: str
weight: Optional[int]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "GrpcRoute.Destination":
return cls(
service_name=d["serviceName"],
weight=d.get("weight"),
)
@dataclasses.dataclass(frozen=True)
class RouteAction:
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "GrpcRoute.RouteAction":
destinations = (
[
GrpcRoute.Destination.from_response(dest)
for dest in d["destinations"]
]
if "destinations" in d
else []
)
return cls(destinations=destinations)
@dataclasses.dataclass(frozen=True)
class RouteRule:
matches: List["GrpcRoute.RouteMatch"]
action: "GrpcRoute.RouteAction"
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "GrpcRoute.RouteRule":
matches = (
[GrpcRoute.RouteMatch.from_response(m) for m in d["matches"]]
if "matches" in d
else []
)
return cls(
matches=matches,
action=GrpcRoute.RouteAction.from_response(d["action"]),
)
name: str
url: str
hostnames: Tuple[str]
rules: Tuple["GrpcRoute.RouteRule"]
meshes: Optional[Tuple[str]]
@classmethod
def from_response(
cls, name: str, d: Dict[str, Any]
) -> "GrpcRoute.RouteRule":
return cls(
name=name,
url=d["name"],
hostnames=tuple(d["hostnames"]),
rules=tuple(d["rules"]),
meshes=None if d.get("meshes") is None else tuple(d["meshes"]),
)
@dataclasses.dataclass(frozen=True)
class HttpRoute:
@dataclasses.dataclass(frozen=True)
class MethodMatch:
type: Optional[str]
http_service: Optional[str]
http_method: Optional[str]
case_sensitive: Optional[bool]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "HttpRoute.MethodMatch":
return cls(
type=d.get("type"),
http_service=d.get("httpService"),
http_method=d.get("httpMethod"),
case_sensitive=d.get("caseSensitive"),
)
@dataclasses.dataclass(frozen=True)
class HeaderMatch:
type: Optional[str]
key: str
value: str
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "HttpRoute.HeaderMatch":
return cls(
type=d.get("type"),
key=d["key"],
value=d["value"],
)
@dataclasses.dataclass(frozen=True)
class RouteMatch:
method: Optional["HttpRoute.MethodMatch"]
headers: Tuple["HttpRoute.HeaderMatch"]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "HttpRoute.RouteMatch":
return cls(
method=HttpRoute.MethodMatch.from_response(d["method"])
if "method" in d
else None,
headers=tuple(
HttpRoute.HeaderMatch.from_response(h) for h in d["headers"]
)
if "headers" in d
else (),
)
@dataclasses.dataclass(frozen=True)
class Destination:
service_name: str
weight: Optional[int]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "HttpRoute.Destination":
return cls(
service_name=d["serviceName"],
weight=d.get("weight"),
)
@dataclasses.dataclass(frozen=True)
class RouteAction:
destinations: List["HttpRoute.Destination"]
stateful_session_affinity: Optional["HttpRoute.StatefulSessionAffinity"]
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "HttpRoute.RouteAction":
destinations = (
[
HttpRoute.Destination.from_response(dest)
for dest in d["destinations"]
]
if "destinations" in d
else []
)
stateful_session_affinity = (
HttpRoute.StatefulSessionAffinity.from_response(
d["statefulSessionAffinity"]
)
if "statefulSessionAffinity" in d
else None
)
return cls(
destinations=destinations,
stateful_session_affinity=stateful_session_affinity,
)
@dataclasses.dataclass(frozen=True)
class StatefulSessionAffinity:
cookie_ttl: Optional[str]
@classmethod
def from_response(
cls, d: Dict[str, Any]
) -> "HttpRoute.StatefulSessionAffinity":
return cls(cookie_ttl=d.get("cookieTtl"))
@dataclasses.dataclass(frozen=True)
class RouteRule:
matches: List["HttpRoute.RouteMatch"]
action: "HttpRoute.RouteAction"
@classmethod
def from_response(cls, d: Dict[str, Any]) -> "HttpRoute.RouteRule":
matches = (
[HttpRoute.RouteMatch.from_response(m) for m in d["matches"]]
if "matches" in d
else []
)
return cls(
matches=matches,
action=HttpRoute.RouteAction.from_response(d["action"]),
)
name: str
url: str
hostnames: Tuple[str]
rules: Tuple["HttpRoute.RouteRule"]
meshes: Optional[Tuple[str]]
@classmethod
def from_response(cls, name: str, d: Dict[str, Any]) -> "HttpRoute":
return cls(
name=name,
url=d["name"],
hostnames=tuple(d["hostnames"]),
rules=tuple(d["rules"]),
meshes=None if d.get("meshes") is None else tuple(d["meshes"]),
)
class _NetworkServicesBase(
gcp.api.GcpStandardCloudApiResource, metaclass=abc.ABCMeta
):
"""Base class for NetworkServices APIs."""
# TODO(https://github.com/grpc/grpc/issues/29532) remove pylint disable
# pylint: disable=abstract-method
def __init__(self, api_manager: gcp.api.GcpApiManager, project: str):
super().__init__(api_manager.networkservices(self.api_version), project)
# Shortcut to projects/*/locations/ endpoints
self._api_locations = self.api.projects().locations()
@property
def api_name(self) -> str:
return "networkservices"
def _execute(
self, *args, **kwargs
): # pylint: disable=signature-differs,arguments-differ
# Workaround TD bug: throttled operations are reported as internal.
# Ref b/175345578
retryer = tenacity.Retrying(
retry=tenacity.retry_if_exception(self._operation_internal_error),
wait=tenacity.wait_fixed(10),
stop=tenacity.stop_after_delay(5 * 60),
before_sleep=tenacity.before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
retryer(super()._execute, *args, **kwargs)
@staticmethod
def _operation_internal_error(exception):
return (
isinstance(exception, gcp.api.OperationError)
and exception.error.code == code_pb2.INTERNAL
)
class NetworkServicesV1Beta1(_NetworkServicesBase):
"""NetworkServices API v1beta1."""
ENDPOINT_POLICIES = "endpointPolicies"
@property
def api_version(self) -> str:
return "v1beta1"
def create_endpoint_policy(self, name, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.endpointPolicies(),
body=body,
endpointPolicyId=name,
)
def get_endpoint_policy(self, name: str) -> EndpointPolicy:
response = self._get_resource(
collection=self._api_locations.endpointPolicies(),
full_name=self.resource_full_name(name, self.ENDPOINT_POLICIES),
)
return EndpointPolicy.from_response(name, response)
def delete_endpoint_policy(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.endpointPolicies(),
full_name=self.resource_full_name(name, self.ENDPOINT_POLICIES),
)
class NetworkServicesV1Alpha1(NetworkServicesV1Beta1):
"""NetworkServices API v1alpha1.
Note: extending v1beta1 class presumes that v1beta1 is just a v1alpha1 API
graduated into a more stable version. This is true in most cases. However,
v1alpha1 class can always override and reimplement incompatible methods.
"""
HTTP_ROUTES = "httpRoutes"
GRPC_ROUTES = "grpcRoutes"
MESHES = "meshes"
@property
def api_version(self) -> str:
return "v1alpha1"
def create_mesh(self, name: str, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.meshes(), body=body, meshId=name
)
def get_mesh(self, name: str) -> Mesh:
full_name = self.resource_full_name(name, self.MESHES)
result = self._get_resource(
collection=self._api_locations.meshes(), full_name=full_name
)
return Mesh.from_response(name, result)
def delete_mesh(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.meshes(),
full_name=self.resource_full_name(name, self.MESHES),
)
def create_grpc_route(self, name: str, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.grpcRoutes(),
body=body,
grpcRouteId=name,
)
def create_http_route(self, name: str, body: dict) -> GcpResource:
return self._create_resource(
collection=self._api_locations.httpRoutes(),
body=body,
httpRouteId=name,
)
def get_grpc_route(self, name: str) -> GrpcRoute:
full_name = self.resource_full_name(name, self.GRPC_ROUTES)
result = self._get_resource(
collection=self._api_locations.grpcRoutes(), full_name=full_name
)
return GrpcRoute.from_response(name, result)
def get_http_route(self, name: str) -> GrpcRoute:
full_name = self.resource_full_name(name, self.HTTP_ROUTES)
result = self._get_resource(
collection=self._api_locations.httpRoutes(), full_name=full_name
)
return HttpRoute.from_response(name, result)
def delete_grpc_route(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.grpcRoutes(),
full_name=self.resource_full_name(name, self.GRPC_ROUTES),
)
def delete_http_route(self, name: str) -> bool:
return self._delete_resource(
collection=self._api_locations.httpRoutes(),
full_name=self.resource_full_name(name, self.HTTP_ROUTES),
)

@ -1,13 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,142 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import pathlib
import threading
from typing import Any, Callable, Optional, TextIO
from kubernetes import client
from kubernetes.watch import watch
logger = logging.getLogger(__name__)
class PodLogCollector(threading.Thread):
"""A thread that streams logs from the remote pod to a local file."""
pod_name: str
namespace_name: str
stop_event: threading.Event
drain_event: threading.Event
log_path: pathlib.Path
log_to_stdout: bool
log_timestamps: bool
error_backoff_sec: int
_out_stream: Optional[TextIO]
_watcher: Optional[watch.Watch]
_read_pod_log_fn: Callable[..., Any]
def __init__(
self,
*,
pod_name: str,
namespace_name: str,
read_pod_log_fn: Callable[..., Any],
stop_event: threading.Event,
log_path: pathlib.Path,
log_to_stdout: bool = False,
log_timestamps: bool = False,
error_backoff_sec: int = 5,
):
self.pod_name = pod_name
self.namespace_name = namespace_name
self.stop_event = stop_event
# Used to indicate log draining happened. Turned out to be not as useful
# in cases when the logging happens rarely because the blocking happens
# in the native code, which doesn't yield until the next log message.
self.drain_event = threading.Event()
self.log_path = log_path
self.log_to_stdout = log_to_stdout
self.log_timestamps = log_timestamps
self.error_backoff_sec = error_backoff_sec
self._read_pod_log_fn = read_pod_log_fn
self._out_stream = None
self._watcher = None
super().__init__(name=f"pod-log-{pod_name}", daemon=True)
def run(self):
logger.info(
"Starting log collection thread %i for %s",
self.ident,
self.pod_name,
)
try:
self._out_stream = open(
self.log_path, "w", errors="ignore", encoding="utf-8"
)
while not self.stop_event.is_set():
self._stream_log()
finally:
self._stop()
def flush(self):
"""Flushes the log file buffer. May be called from the main thread."""
if self._out_stream:
self._out_stream.flush()
os.fsync(self._out_stream.fileno())
def _stop(self):
if self._watcher is not None:
self._watcher.stop()
self._watcher = None
if self._out_stream is not None:
self._write(
f"Finished log collection for pod {self.pod_name}",
force_flush=True,
)
self._out_stream.close()
self._out_stream = None
self.drain_event.set()
def _stream_log(self):
try:
self._restart_stream()
except client.ApiException as e:
self._write(f"Exception fetching logs: {e}")
self._write(
(
f"Restarting log fetching in {self.error_backoff_sec} sec. "
"Will attempt to read from the beginning, but log "
"truncation may occur."
),
force_flush=True,
)
finally:
# Instead of time.sleep(), we're waiting on the stop event
# in case it gets set earlier.
self.stop_event.wait(timeout=self.error_backoff_sec)
def _restart_stream(self):
self._watcher = watch.Watch()
for msg in self._watcher.stream(
self._read_pod_log_fn,
name=self.pod_name,
namespace=self.namespace_name,
timestamps=self.log_timestamps,
follow=True,
):
self._write(msg)
# Every message check if a stop is requested.
if self.stop_event.is_set():
self._stop()
return
def _write(self, msg: str, force_flush: bool = False):
self._out_stream.write(msg)
self._out_stream.write("\n")
if force_flush:
self.flush()
if self.log_to_stdout:
logger.info(msg)

@ -1,133 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
import subprocess
import time
from typing import Optional
logger = logging.getLogger(__name__)
class PortForwardingError(Exception):
"""Error forwarding port"""
class PortForwarder:
PORT_FORWARD_LOCAL_ADDRESS: str = "127.0.0.1"
def __init__(
self,
context: str,
namespace: str,
destination: str,
remote_port: int,
local_port: Optional[int] = None,
local_address: Optional[str] = None,
):
self.context = context
self.namespace = namespace
self.destination = destination
self.remote_port = remote_port
self.local_address = local_address or self.PORT_FORWARD_LOCAL_ADDRESS
self.local_port: Optional[int] = local_port
self.subprocess: Optional[subprocess.Popen] = None
def connect(self) -> None:
if self.local_port:
port_mapping = f"{self.local_port}:{self.remote_port}"
else:
port_mapping = f":{self.remote_port}"
cmd = [
"kubectl",
"--context",
self.context,
"--namespace",
self.namespace,
"port-forward",
"--address",
self.local_address,
self.destination,
port_mapping,
]
logger.debug(
"Executing port forwarding subprocess cmd: %s", " ".join(cmd)
)
self.subprocess = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
)
# Wait for stdout line indicating successful start.
if self.local_port:
local_port_expected = (
f"Forwarding from {self.local_address}:{self.local_port}"
f" -> {self.remote_port}"
)
else:
local_port_re = re.compile(
f"Forwarding from {self.local_address}:([0-9]+) ->"
f" {self.remote_port}"
)
try:
while True:
time.sleep(0.05)
output = self.subprocess.stdout.readline().strip()
if not output:
return_code = self.subprocess.poll()
if return_code is not None:
errors = [
error
for error in self.subprocess.stdout.readlines()
]
raise PortForwardingError(
"Error forwarding port, kubectl return "
f"code {return_code}, output {errors}"
)
# If there is no output, and the subprocess is not exiting,
# continue waiting for the log line.
continue
# Validate output log
if self.local_port:
if output != local_port_expected:
raise PortForwardingError(
f"Error forwarding port, unexpected output {output}"
)
else:
groups = local_port_re.search(output)
if groups is None:
raise PortForwardingError(
f"Error forwarding port, unexpected output {output}"
)
# Update local port to the randomly picked one
self.local_port = int(groups[1])
logger.info(output)
break
except Exception:
self.close()
raise
def close(self) -> None:
if self.subprocess is not None:
logger.info(
"Shutting down port forwarding, pid %s", self.subprocess.pid
)
self.subprocess.kill()
stdout, _ = self.subprocess.communicate(timeout=5)
logger.info("Port forwarding stopped")
logger.debug("Port forwarding remaining stdout: %s", stdout)
self.subprocess = None

@ -1,22 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import framework.infrastructure.traffic_director as td_base
# TODO(sergiitk): [GAMMA] make a TD-manager-less base test case.
class TrafficDirectorGammaManager(td_base.TrafficDirectorManager):
"""Gamma."""
def cleanup(self, *, force=False): # pylint: disable=unused-argument
return True

@ -1,14 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from framework.rpc import grpc

@ -1,117 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
from typing import Any, Dict, Optional
from google.protobuf import json_format
import google.protobuf.message
import grpc
import framework.errors
logger = logging.getLogger(__name__)
# Type aliases
Message = google.protobuf.message.Message
RpcError = grpc.RpcError
class GrpcClientHelper:
DEFAULT_RPC_DEADLINE_SEC = 90
channel: grpc.Channel
# This is purely cosmetic to make RPC logs look like method calls.
log_service_name: str
# This is purely cosmetic to output the RPC target. Normally set to the
# hostname:port of the remote service, but it doesn't have to be the
# real target. This is done so that when RPC are routed to the proxy
# or port forwarding, this still is set to a useful name.
log_target: str
def __init__(
self,
channel: grpc.Channel,
stub_class: Any,
*,
log_target: Optional[str] = "",
):
self.channel = channel
self.stub = stub_class(channel)
self.log_service_name = re.sub(
"Stub$", "", self.stub.__class__.__name__
)
self.log_target = log_target or ""
def call_unary_with_deadline(
self,
*,
rpc: str,
req: Message,
deadline_sec: Optional[int] = DEFAULT_RPC_DEADLINE_SEC,
log_level: Optional[int] = logging.DEBUG,
) -> Message:
if deadline_sec is None:
deadline_sec = self.DEFAULT_RPC_DEADLINE_SEC
call_kwargs = dict(wait_for_ready=True, timeout=deadline_sec)
self._log_rpc_request(rpc, req, call_kwargs, log_level)
# Call RPC, e.g. RpcStub(channel).RpcMethod(req, ...options)
rpc_callable: grpc.UnaryUnaryMultiCallable = getattr(self.stub, rpc)
return rpc_callable(req, **call_kwargs)
def _log_rpc_request(self, rpc, req, call_kwargs, log_level=logging.DEBUG):
logger.log(
logging.DEBUG if log_level is None else log_level,
"[%s] >> RPC %s.%s(request=%s(%r), %s)",
self.log_target,
self.log_service_name,
rpc,
req.__class__.__name__,
json_format.MessageToDict(req),
", ".join({f"{k}={v}" for k, v in call_kwargs.items()}),
)
class GrpcApp:
channels: Dict[int, grpc.Channel]
class NotFound(framework.errors.FrameworkError):
"""Requested resource not found"""
def __init__(self, rpc_host):
self.rpc_host = rpc_host
# Cache gRPC channels per port
self.channels = dict()
def _make_channel(self, port) -> grpc.Channel:
if port not in self.channels:
target = f"{self.rpc_host}:{port}"
self.channels[port] = grpc.insecure_channel(target)
return self.channels[port]
def close(self):
# Close all channels
for channel in self.channels.values():
channel.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False
def __del__(self):
self.close()

@ -1,273 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This contains helpers for gRPC services defined in
https://github.com/grpc/grpc-proto/blob/master/grpc/channelz/v1/channelz.proto
"""
import ipaddress
import logging
from typing import Iterator, Optional
import grpc
from grpc_channelz.v1 import channelz_pb2
from grpc_channelz.v1 import channelz_pb2_grpc
import framework.rpc
logger = logging.getLogger(__name__)
# Type aliases
# Channel
Channel = channelz_pb2.Channel
ChannelData = channelz_pb2.ChannelData
ChannelConnectivityState = channelz_pb2.ChannelConnectivityState
ChannelState = ChannelConnectivityState.State # pylint: disable=no-member
_GetTopChannelsRequest = channelz_pb2.GetTopChannelsRequest
_GetTopChannelsResponse = channelz_pb2.GetTopChannelsResponse
# Subchannel
Subchannel = channelz_pb2.Subchannel
_GetSubchannelRequest = channelz_pb2.GetSubchannelRequest
_GetSubchannelResponse = channelz_pb2.GetSubchannelResponse
# Server
Server = channelz_pb2.Server
_GetServersRequest = channelz_pb2.GetServersRequest
_GetServersResponse = channelz_pb2.GetServersResponse
# Sockets
Socket = channelz_pb2.Socket
SocketRef = channelz_pb2.SocketRef
_GetSocketRequest = channelz_pb2.GetSocketRequest
_GetSocketResponse = channelz_pb2.GetSocketResponse
Address = channelz_pb2.Address
Security = channelz_pb2.Security
# Server Sockets
_GetServerSocketsRequest = channelz_pb2.GetServerSocketsRequest
_GetServerSocketsResponse = channelz_pb2.GetServerSocketsResponse
class ChannelzServiceClient(framework.rpc.grpc.GrpcClientHelper):
stub: channelz_pb2_grpc.ChannelzStub
def __init__(
self, channel: grpc.Channel, *, log_target: Optional[str] = ""
):
super().__init__(
channel, channelz_pb2_grpc.ChannelzStub, log_target=log_target
)
@staticmethod
def is_sock_tcpip_address(address: Address):
return address.WhichOneof("address") == "tcpip_address"
@staticmethod
def is_ipv4(tcpip_address: Address.TcpIpAddress):
# According to proto, tcpip_address.ip_address is either IPv4 or IPv6.
# Correspondingly, it's either 4 bytes or 16 bytes in length.
return len(tcpip_address.ip_address) == 4
@classmethod
def sock_address_to_str(cls, address: Address):
if cls.is_sock_tcpip_address(address):
tcpip_address: Address.TcpIpAddress = address.tcpip_address
if cls.is_ipv4(tcpip_address):
ip = ipaddress.IPv4Address(tcpip_address.ip_address)
else:
ip = ipaddress.IPv6Address(tcpip_address.ip_address)
return f"{ip}:{tcpip_address.port}"
else:
raise NotImplementedError("Only tcpip_address implemented")
@classmethod
def sock_addresses_pretty(cls, socket: Socket):
return (
f"local={cls.sock_address_to_str(socket.local)}, "
f"remote={cls.sock_address_to_str(socket.remote)}"
)
@staticmethod
def find_server_socket_matching_client(
server_sockets: Iterator[Socket], client_socket: Socket
) -> Socket:
for server_socket in server_sockets:
if server_socket.remote == client_socket.local:
return server_socket
return None
@staticmethod
def channel_repr(channel: Channel) -> str:
result = f"<Channel channel_id={channel.ref.channel_id}"
if channel.data.target:
result += f" target={channel.data.target}"
result += (
f" call_started={channel.data.calls_started}"
+ f" calls_succeeded={channel.data.calls_succeeded}"
+ f" calls_failed={channel.data.calls_failed}"
)
result += f" state={ChannelState.Name(channel.data.state.state)}>"
return result
@staticmethod
def subchannel_repr(subchannel: Subchannel) -> str:
result = f"<Subchannel subchannel_id={subchannel.ref.subchannel_id}"
if subchannel.data.target:
result += f" target={subchannel.data.target}"
result += f" state={ChannelState.Name(subchannel.data.state.state)}>"
return result
def find_channels_for_target(
self, target: str, **kwargs
) -> Iterator[Channel]:
return (
channel
for channel in self.list_channels(**kwargs)
if channel.data.target == target
)
def find_server_listening_on_port(
self, port: int, **kwargs
) -> Optional[Server]:
for server in self.list_servers(**kwargs):
listen_socket_ref: SocketRef
for listen_socket_ref in server.listen_socket:
listen_socket = self.get_socket(
listen_socket_ref.socket_id, **kwargs
)
listen_address: Address = listen_socket.local
if (
self.is_sock_tcpip_address(listen_address)
and listen_address.tcpip_address.port == port
):
return server
return None
def list_channels(self, **kwargs) -> Iterator[Channel]:
"""
Iterate over all pages of all root channels.
Root channels are those which application has directly created.
This does not include subchannels nor non-top level channels.
"""
start: int = -1
response: Optional[_GetTopChannelsResponse] = None
while start < 0 or not response.end:
# From proto: To request subsequent pages, the client generates this
# value by adding 1 to the highest seen result ID.
start += 1
response = self.call_unary_with_deadline(
rpc="GetTopChannels",
req=_GetTopChannelsRequest(start_channel_id=start),
**kwargs,
)
for channel in response.channel:
start = max(start, channel.ref.channel_id)
yield channel
def get_channel(self, channel_id, **kwargs) -> Channel:
"""Return a single Channel, otherwise raises RpcError."""
response: channelz_pb2.GetChannelResponse
try:
response = self.call_unary_with_deadline(
rpc="GetChannel",
req=channelz_pb2.GetChannelRequest(channel_id=channel_id),
**kwargs,
)
return response.channel
except grpc.RpcError as err:
if isinstance(err, grpc.Call):
# Translate NOT_FOUND into GrpcApp.NotFound.
if err.code() is grpc.StatusCode.NOT_FOUND:
raise framework.rpc.grpc.GrpcApp.NotFound(
f"Channel with channel_id {channel_id} not found",
)
raise
def list_servers(self, **kwargs) -> Iterator[Server]:
"""Iterate over all pages of all servers that exist in the process."""
start: int = -1
response: Optional[_GetServersResponse] = None
while start < 0 or not response.end:
# From proto: To request subsequent pages, the client generates this
# value by adding 1 to the highest seen result ID.
start += 1
response = self.call_unary_with_deadline(
rpc="GetServers",
req=_GetServersRequest(start_server_id=start),
**kwargs,
)
for server in response.server:
start = max(start, server.ref.server_id)
yield server
def list_server_sockets(self, server: Server, **kwargs) -> Iterator[Socket]:
"""List all server sockets that exist in server process.
Iterating over the results will resolve additional pages automatically.
"""
start: int = -1
response: Optional[_GetServerSocketsResponse] = None
while start < 0 or not response.end:
# From proto: To request subsequent pages, the client generates this
# value by adding 1 to the highest seen result ID.
start += 1
response = self.call_unary_with_deadline(
rpc="GetServerSockets",
req=_GetServerSocketsRequest(
server_id=server.ref.server_id, start_socket_id=start
),
**kwargs,
)
socket_ref: SocketRef
for socket_ref in response.socket_ref:
start = max(start, socket_ref.socket_id)
# Yield actual socket
yield self.get_socket(socket_ref.socket_id, **kwargs)
def list_channel_sockets(
self, channel: Channel, **kwargs
) -> Iterator[Socket]:
"""List all sockets of all subchannels of a given channel."""
for subchannel in self.list_channel_subchannels(channel, **kwargs):
yield from self.list_subchannels_sockets(subchannel, **kwargs)
def list_channel_subchannels(
self, channel: Channel, **kwargs
) -> Iterator[Subchannel]:
"""List all subchannels of a given channel."""
for subchannel_ref in channel.subchannel_ref:
yield self.get_subchannel(subchannel_ref.subchannel_id, **kwargs)
def list_subchannels_sockets(
self, subchannel: Subchannel, **kwargs
) -> Iterator[Socket]:
"""List all sockets of a given subchannel."""
for socket_ref in subchannel.socket_ref:
yield self.get_socket(socket_ref.socket_id, **kwargs)
def get_subchannel(self, subchannel_id, **kwargs) -> Subchannel:
"""Return a single Subchannel, otherwise raises RpcError."""
response: _GetSubchannelResponse = self.call_unary_with_deadline(
rpc="GetSubchannel",
req=_GetSubchannelRequest(subchannel_id=subchannel_id),
**kwargs,
)
return response.subchannel
def get_socket(self, socket_id, **kwargs) -> Socket:
"""Return a single Socket, otherwise raises RpcError."""
response: _GetSocketResponse = self.call_unary_with_deadline(
rpc="GetSocket",
req=_GetSocketRequest(socket_id=socket_id),
**kwargs,
)
return response.socket

@ -1,66 +0,0 @@
# Copyright 2021 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This contains helpers for gRPC services defined in
https://github.com/envoyproxy/envoy/blob/main/api/envoy/service/status/v3/csds.proto
"""
import logging
from typing import Optional
# Needed to load the descriptors so that Any is parsed
# TODO(sergiitk): replace with import xds_protos when it works
# isort: off
# pylint: disable=unused-import,ungrouped-imports
import framework.rpc.xds_protos_imports
# pylint: enable=unused-import,ungrouped-imports
# isort: on
from envoy.service.status.v3 import csds_pb2
from envoy.service.status.v3 import csds_pb2_grpc
import grpc
import framework.rpc
logger = logging.getLogger(__name__)
# Type aliases
ClientConfig = csds_pb2.ClientConfig
_ClientStatusRequest = csds_pb2.ClientStatusRequest
class CsdsClient(framework.rpc.grpc.GrpcClientHelper):
stub: csds_pb2_grpc.ClientStatusDiscoveryServiceStub
def __init__(
self, channel: grpc.Channel, *, log_target: Optional[str] = ""
):
super().__init__(
channel,
csds_pb2_grpc.ClientStatusDiscoveryServiceStub,
log_target=log_target,
)
def fetch_client_status(self, **kwargs) -> Optional[ClientConfig]:
"""Fetches the active xDS configurations."""
response = self.call_unary_with_deadline(
rpc="FetchClientStatus", req=_ClientStatusRequest(), **kwargs
)
if len(response.config) != 1:
logger.debug(
"Unexpected number of client configs: %s", len(response.config)
)
return None
return response.config[0]

@ -1,176 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This contains helpers for gRPC services defined in
https://github.com/grpc/grpc/blob/master/src/proto/grpc/testing/test.proto
"""
import logging
from typing import Iterable, Optional, Tuple
import grpc
from grpc_health.v1 import health_pb2
from grpc_health.v1 import health_pb2_grpc
import framework.rpc
from src.proto.grpc.testing import empty_pb2
from src.proto.grpc.testing import messages_pb2
from src.proto.grpc.testing import test_pb2_grpc
# Type aliases
_LoadBalancerStatsRequest = messages_pb2.LoadBalancerStatsRequest
LoadBalancerStatsResponse = messages_pb2.LoadBalancerStatsResponse
_LoadBalancerAccumulatedStatsRequest = (
messages_pb2.LoadBalancerAccumulatedStatsRequest
)
LoadBalancerAccumulatedStatsResponse = (
messages_pb2.LoadBalancerAccumulatedStatsResponse
)
MethodStats = messages_pb2.LoadBalancerAccumulatedStatsResponse.MethodStats
RpcsByPeer = messages_pb2.LoadBalancerStatsResponse.RpcsByPeer
class LoadBalancerStatsServiceClient(framework.rpc.grpc.GrpcClientHelper):
stub: test_pb2_grpc.LoadBalancerStatsServiceStub
STATS_PARTIAL_RESULTS_TIMEOUT_SEC = 1200
STATS_ACCUMULATED_RESULTS_TIMEOUT_SEC = 600
def __init__(
self, channel: grpc.Channel, *, log_target: Optional[str] = ""
):
super().__init__(
channel,
test_pb2_grpc.LoadBalancerStatsServiceStub,
log_target=log_target,
)
def get_client_stats(
self,
*,
num_rpcs: int,
timeout_sec: Optional[int] = STATS_PARTIAL_RESULTS_TIMEOUT_SEC,
metadata_keys: Optional[tuple[str, ...]] = None,
) -> LoadBalancerStatsResponse:
if timeout_sec is None:
timeout_sec = self.STATS_PARTIAL_RESULTS_TIMEOUT_SEC
return self.call_unary_with_deadline(
rpc="GetClientStats",
req=_LoadBalancerStatsRequest(
num_rpcs=num_rpcs,
timeout_sec=timeout_sec,
metadata_keys=metadata_keys or None,
),
deadline_sec=timeout_sec,
log_level=logging.INFO,
)
def get_client_accumulated_stats(
self, *, timeout_sec: Optional[int] = None
) -> LoadBalancerAccumulatedStatsResponse:
if timeout_sec is None:
timeout_sec = self.STATS_ACCUMULATED_RESULTS_TIMEOUT_SEC
return self.call_unary_with_deadline(
rpc="GetClientAccumulatedStats",
req=_LoadBalancerAccumulatedStatsRequest(),
deadline_sec=timeout_sec,
log_level=logging.INFO,
)
class XdsUpdateClientConfigureServiceClient(
framework.rpc.grpc.GrpcClientHelper
):
stub: test_pb2_grpc.XdsUpdateClientConfigureServiceStub
CONFIGURE_TIMEOUT_SEC: int = 5
def __init__(
self, channel: grpc.Channel, *, log_target: Optional[str] = ""
):
super().__init__(
channel,
test_pb2_grpc.XdsUpdateClientConfigureServiceStub,
log_target=log_target,
)
def configure(
self,
*,
rpc_types: Iterable[str],
metadata: Optional[Iterable[Tuple[str, str, str]]] = None,
app_timeout: Optional[int] = None,
timeout_sec: int = CONFIGURE_TIMEOUT_SEC,
) -> None:
request = messages_pb2.ClientConfigureRequest()
for rpc_type in rpc_types:
request.types.append(
messages_pb2.ClientConfigureRequest.RpcType.Value(rpc_type)
)
if metadata:
for entry in metadata:
request.metadata.append(
messages_pb2.ClientConfigureRequest.Metadata(
type=messages_pb2.ClientConfigureRequest.RpcType.Value(
entry[0]
),
key=entry[1],
value=entry[2],
)
)
if app_timeout:
request.timeout_sec = app_timeout
# Configure's response is empty
self.call_unary_with_deadline(
rpc="Configure",
req=request,
deadline_sec=timeout_sec,
log_level=logging.INFO,
)
class XdsUpdateHealthServiceClient(framework.rpc.grpc.GrpcClientHelper):
stub: test_pb2_grpc.XdsUpdateHealthServiceStub
def __init__(self, channel: grpc.Channel, log_target: Optional[str] = ""):
super().__init__(
channel,
test_pb2_grpc.XdsUpdateHealthServiceStub,
log_target=log_target,
)
def set_serving(self):
self.call_unary_with_deadline(
rpc="SetServing", req=empty_pb2.Empty(), log_level=logging.INFO
)
def set_not_serving(self):
self.call_unary_with_deadline(
rpc="SetNotServing", req=empty_pb2.Empty(), log_level=logging.INFO
)
class HealthClient(framework.rpc.grpc.GrpcClientHelper):
stub: health_pb2_grpc.HealthStub
def __init__(self, channel: grpc.Channel, log_target: Optional[str] = ""):
super().__init__(
channel, health_pb2_grpc.HealthStub, log_target=log_target
)
def check_health(self):
return self.call_unary_with_deadline(
rpc="Check",
req=health_pb2.HealthCheckRequest(),
log_level=logging.INFO,
)

@ -1,849 +0,0 @@
# Copyright 2023 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Envoy protos provided by PyPI package xds-protos"""
# pylint: disable=unused-import,ungrouped-imports,reimported
# isort: off
from envoy.config.health_checker.redis.v2 import redis_pb2
from envoy.config.listener.v3 import listener_components_pb2
from envoy.config.listener.v3 import udp_listener_config_pb2
from envoy.config.listener.v3 import quic_config_pb2
from envoy.config.listener.v3 import api_listener_pb2
from envoy.config.listener.v3 import listener_pb2
from envoy.config.listener.v2 import api_listener_pb2
from envoy.config.transport_socket.alts.v2alpha import alts_pb2
from envoy.config.transport_socket.raw_buffer.v2 import raw_buffer_pb2
from envoy.config.transport_socket.tap.v2alpha import tap_pb2
from envoy.config.core.v3 import base_pb2
from envoy.config.core.v3 import substitution_format_string_pb2
from envoy.config.core.v3 import backoff_pb2
from envoy.config.core.v3 import grpc_service_pb2
from envoy.config.core.v3 import proxy_protocol_pb2
from envoy.config.core.v3 import protocol_pb2
from envoy.config.core.v3 import address_pb2
from envoy.config.core.v3 import health_check_pb2
from envoy.config.core.v3 import udp_socket_config_pb2
from envoy.config.core.v3 import grpc_method_list_pb2
from envoy.config.core.v3 import socket_option_pb2
from envoy.config.core.v3 import extension_pb2
from envoy.config.core.v3 import config_source_pb2
from envoy.config.core.v3 import event_service_config_pb2
from envoy.config.core.v3 import http_uri_pb2
from envoy.config.core.v3 import resolver_pb2
from envoy.config.retry.previous_hosts.v2 import previous_hosts_pb2
from envoy.config.retry.previous_priorities import (
previous_priorities_config_pb2,
)
from envoy.config.retry.omit_host_metadata.v2 import (
omit_host_metadata_config_pb2,
)
from envoy.config.retry.omit_canary_hosts.v2 import omit_canary_hosts_pb2
from envoy.config.common.dynamic_forward_proxy.v2alpha import dns_cache_pb2
from envoy.config.common.mutation_rules.v3 import mutation_rules_pb2
from envoy.config.common.matcher.v3 import matcher_pb2
from envoy.config.common.tap.v2alpha import common_pb2
from envoy.config.common.key_value.v3 import config_pb2
from envoy.config.metrics.v3 import stats_pb2
from envoy.config.metrics.v3 import metrics_service_pb2
from envoy.config.metrics.v2 import stats_pb2
from envoy.config.metrics.v2 import metrics_service_pb2
from envoy.config.ratelimit.v3 import rls_pb2
from envoy.config.ratelimit.v2 import rls_pb2
from envoy.config.trace.v2alpha import xray_pb2
from envoy.config.trace.v3 import http_tracer_pb2
from envoy.config.trace.v3 import zipkin_pb2
from envoy.config.trace.v3 import trace_pb2
from envoy.config.trace.v3 import lightstep_pb2
from envoy.config.trace.v3 import datadog_pb2
from envoy.config.trace.v3 import opentelemetry_pb2
from envoy.config.trace.v3 import opencensus_pb2
from envoy.config.trace.v3 import dynamic_ot_pb2
from envoy.config.trace.v3 import xray_pb2
from envoy.config.trace.v3 import service_pb2
from envoy.config.trace.v3 import skywalking_pb2
from envoy.config.trace.v2 import http_tracer_pb2
from envoy.config.trace.v2 import zipkin_pb2
from envoy.config.trace.v2 import trace_pb2
from envoy.config.trace.v2 import lightstep_pb2
from envoy.config.trace.v2 import datadog_pb2
from envoy.config.trace.v2 import opencensus_pb2
from envoy.config.trace.v2 import dynamic_ot_pb2
from envoy.config.trace.v2 import service_pb2
from envoy.config.cluster.dynamic_forward_proxy.v2alpha import cluster_pb2
from envoy.config.cluster.redis import redis_cluster_pb2
from envoy.config.cluster.v3 import filter_pb2
from envoy.config.cluster.v3 import cluster_pb2
from envoy.config.cluster.v3 import circuit_breaker_pb2
from envoy.config.cluster.v3 import outlier_detection_pb2
from envoy.config.cluster.aggregate.v2alpha import cluster_pb2
from envoy.config.rbac.v3 import rbac_pb2
from envoy.config.rbac.v2 import rbac_pb2
from envoy.config.endpoint.v3 import endpoint_components_pb2
from envoy.config.endpoint.v3 import load_report_pb2
from envoy.config.endpoint.v3 import endpoint_pb2
from envoy.config.resource_monitor.fixed_heap.v2alpha import fixed_heap_pb2
from envoy.config.resource_monitor.injected_resource.v2alpha import (
injected_resource_pb2,
)
from envoy.config.grpc_credential.v2alpha import aws_iam_pb2
from envoy.config.grpc_credential.v2alpha import file_based_metadata_pb2
from envoy.config.grpc_credential.v3 import aws_iam_pb2
from envoy.config.grpc_credential.v3 import file_based_metadata_pb2
from envoy.config.accesslog.v3 import accesslog_pb2
from envoy.config.accesslog.v2 import als_pb2
from envoy.config.accesslog.v2 import file_pb2
from envoy.config.tap.v3 import common_pb2
from envoy.config.route.v3 import route_components_pb2
from envoy.config.route.v3 import scoped_route_pb2
from envoy.config.route.v3 import route_pb2
from envoy.config.filter.listener.original_dst.v2 import original_dst_pb2
from envoy.config.filter.listener.tls_inspector.v2 import tls_inspector_pb2
from envoy.config.filter.listener.proxy_protocol.v2 import proxy_protocol_pb2
from envoy.config.filter.listener.http_inspector.v2 import http_inspector_pb2
from envoy.config.filter.listener.original_src.v2alpha1 import original_src_pb2
from envoy.config.filter.network.mongo_proxy.v2 import mongo_proxy_pb2
from envoy.config.filter.network.rate_limit.v2 import rate_limit_pb2
from envoy.config.filter.network.ext_authz.v2 import ext_authz_pb2
from envoy.config.filter.network.client_ssl_auth.v2 import client_ssl_auth_pb2
from envoy.config.filter.network.thrift_proxy.v2alpha1 import thrift_proxy_pb2
from envoy.config.filter.network.thrift_proxy.v2alpha1 import route_pb2
from envoy.config.filter.network.kafka_broker.v2alpha1 import kafka_broker_pb2
from envoy.config.filter.network.zookeeper_proxy.v1alpha1 import (
zookeeper_proxy_pb2,
)
from envoy.config.filter.network.dubbo_proxy.v2alpha1 import route_pb2
from envoy.config.filter.network.dubbo_proxy.v2alpha1 import dubbo_proxy_pb2
from envoy.config.filter.network.rbac.v2 import rbac_pb2
from envoy.config.filter.network.tcp_proxy.v2 import tcp_proxy_pb2
from envoy.config.filter.network.echo.v2 import echo_pb2
from envoy.config.filter.network.direct_response.v2 import config_pb2
from envoy.config.filter.network.local_rate_limit.v2alpha import (
local_rate_limit_pb2,
)
from envoy.config.filter.network.sni_cluster.v2 import sni_cluster_pb2
from envoy.config.filter.network.redis_proxy.v2 import redis_proxy_pb2
from envoy.config.filter.network.http_connection_manager.v2 import (
http_connection_manager_pb2,
)
from envoy.config.filter.network.mysql_proxy.v1alpha1 import mysql_proxy_pb2
from envoy.config.filter.dubbo.router.v2alpha1 import router_pb2
from envoy.config.filter.http.dynamic_forward_proxy.v2alpha import (
dynamic_forward_proxy_pb2,
)
from envoy.config.filter.http.gzip.v2 import gzip_pb2
from envoy.config.filter.http.grpc_http1_reverse_bridge.v2alpha1 import (
config_pb2,
)
from envoy.config.filter.http.buffer.v2 import buffer_pb2
from envoy.config.filter.http.cors.v2 import cors_pb2
from envoy.config.filter.http.rate_limit.v2 import rate_limit_pb2
from envoy.config.filter.http.health_check.v2 import health_check_pb2
from envoy.config.filter.http.ext_authz.v2 import ext_authz_pb2
from envoy.config.filter.http.compressor.v2 import compressor_pb2
from envoy.config.filter.http.cache.v2alpha import cache_pb2
from envoy.config.filter.http.adaptive_concurrency.v2alpha import (
adaptive_concurrency_pb2,
)
from envoy.config.filter.http.on_demand.v2 import on_demand_pb2
from envoy.config.filter.http.header_to_metadata.v2 import (
header_to_metadata_pb2,
)
from envoy.config.filter.http.aws_request_signing.v2alpha import (
aws_request_signing_pb2,
)
from envoy.config.filter.http.rbac.v2 import rbac_pb2
from envoy.config.filter.http.transcoder.v2 import transcoder_pb2
from envoy.config.filter.http.dynamo.v2 import dynamo_pb2
from envoy.config.filter.http.csrf.v2 import csrf_pb2
from envoy.config.filter.http.aws_lambda.v2alpha import aws_lambda_pb2
from envoy.config.filter.http.tap.v2alpha import tap_pb2
from envoy.config.filter.http.grpc_http1_bridge.v2 import config_pb2
from envoy.config.filter.http.lua.v2 import lua_pb2
from envoy.config.filter.http.ip_tagging.v2 import ip_tagging_pb2
from envoy.config.filter.http.grpc_stats.v2alpha import config_pb2
from envoy.config.filter.http.router.v2 import router_pb2
from envoy.config.filter.http.fault.v2 import fault_pb2
from envoy.config.filter.http.jwt_authn.v2alpha import config_pb2
from envoy.config.filter.http.grpc_web.v2 import grpc_web_pb2
from envoy.config.filter.http.squash.v2 import squash_pb2
from envoy.config.filter.http.original_src.v2alpha1 import original_src_pb2
from envoy.config.filter.accesslog.v2 import accesslog_pb2
from envoy.config.filter.thrift.rate_limit.v2alpha1 import rate_limit_pb2
from envoy.config.filter.thrift.router.v2alpha1 import router_pb2
from envoy.config.filter.udp.udp_proxy.v2alpha import udp_proxy_pb2
from envoy.config.filter.fault.v2 import fault_pb2
from envoy.config.bootstrap.v3 import bootstrap_pb2
from envoy.config.bootstrap.v2 import bootstrap_pb2
from envoy.config.overload.v2alpha import overload_pb2
from envoy.config.overload.v3 import overload_pb2
from envoy.extensions.internal_redirect.previous_routes.v3 import (
previous_routes_config_pb2,
)
from envoy.extensions.internal_redirect.allow_listed_routes.v3 import (
allow_listed_routes_config_pb2,
)
from envoy.extensions.internal_redirect.safe_cross_scheme.v3 import (
safe_cross_scheme_config_pb2,
)
from envoy.extensions.rate_limit_descriptors.expr.v3 import expr_pb2
from envoy.extensions.udp_packet_writer.v3 import (
udp_gso_batch_writer_factory_pb2,
)
from envoy.extensions.udp_packet_writer.v3 import udp_default_writer_factory_pb2
from envoy.extensions.transport_sockets.s2a.v3 import s2a_pb2
from envoy.extensions.transport_sockets.alts.v3 import alts_pb2
from envoy.extensions.transport_sockets.raw_buffer.v3 import raw_buffer_pb2
from envoy.extensions.transport_sockets.quic.v3 import quic_transport_pb2
from envoy.extensions.transport_sockets.tls.v3 import cert_pb2
from envoy.extensions.transport_sockets.tls.v3 import common_pb2
from envoy.extensions.transport_sockets.tls.v3 import (
tls_spiffe_validator_config_pb2,
)
from envoy.extensions.transport_sockets.tls.v3 import tls_pb2
from envoy.extensions.transport_sockets.tls.v3 import secret_pb2
from envoy.extensions.transport_sockets.http_11_proxy.v3 import (
upstream_http_11_connect_pb2,
)
from envoy.extensions.transport_sockets.starttls.v3 import starttls_pb2
from envoy.extensions.transport_sockets.proxy_protocol.v3 import (
upstream_proxy_protocol_pb2,
)
from envoy.extensions.transport_sockets.tap.v3 import tap_pb2
from envoy.extensions.transport_sockets.internal_upstream.v3 import (
internal_upstream_pb2,
)
from envoy.extensions.transport_sockets.tcp_stats.v3 import tcp_stats_pb2
from envoy.extensions.config.validators.minimum_clusters.v3 import (
minimum_clusters_pb2,
)
from envoy.extensions.stat_sinks.open_telemetry.v3 import open_telemetry_pb2
from envoy.extensions.stat_sinks.graphite_statsd.v3 import graphite_statsd_pb2
from envoy.extensions.stat_sinks.wasm.v3 import wasm_pb2
from envoy.extensions.retry.host.previous_hosts.v3 import previous_hosts_pb2
from envoy.extensions.retry.host.omit_host_metadata.v3 import (
omit_host_metadata_config_pb2,
)
from envoy.extensions.retry.host.omit_canary_hosts.v3 import (
omit_canary_hosts_pb2,
)
from envoy.extensions.retry.priority.previous_priorities.v3 import (
previous_priorities_config_pb2,
)
from envoy.extensions.common.dynamic_forward_proxy.v3 import dns_cache_pb2
from envoy.extensions.common.matching.v3 import extension_matcher_pb2
from envoy.extensions.common.ratelimit.v3 import ratelimit_pb2
from envoy.extensions.common.tap.v3 import common_pb2
from envoy.extensions.common.async_files.v3 import async_file_manager_pb2
from envoy.extensions.network.dns_resolver.cares.v3 import (
cares_dns_resolver_pb2,
)
from envoy.extensions.network.dns_resolver.getaddrinfo.v3 import (
getaddrinfo_dns_resolver_pb2,
)
from envoy.extensions.network.dns_resolver.apple.v3 import (
apple_dns_resolver_pb2,
)
from envoy.extensions.network.socket_interface.v3 import (
default_socket_interface_pb2,
)
from envoy.extensions.matching.common_inputs.network.v3 import (
network_inputs_pb2,
)
from envoy.extensions.matching.common_inputs.environment_variable.v3 import (
input_pb2,
)
from envoy.extensions.matching.common_inputs.ssl.v3 import ssl_inputs_pb2
from envoy.extensions.matching.input_matchers.consistent_hashing.v3 import (
consistent_hashing_pb2,
)
from envoy.extensions.matching.input_matchers.ip.v3 import ip_pb2
from envoy.extensions.matching.input_matchers.runtime_fraction.v3 import (
runtime_fraction_pb2,
)
from envoy.extensions.load_balancing_policies.common.v3 import common_pb2
from envoy.extensions.load_balancing_policies.random.v3 import random_pb2
from envoy.extensions.load_balancing_policies.subset.v3 import subset_pb2
from envoy.extensions.load_balancing_policies.pick_first.v3 import (
pick_first_pb2,
)
from envoy.extensions.load_balancing_policies.ring_hash.v3 import ring_hash_pb2
from envoy.extensions.load_balancing_policies.cluster_provided.v3 import (
cluster_provided_pb2,
)
from envoy.extensions.load_balancing_policies.maglev.v3 import maglev_pb2
from envoy.extensions.load_balancing_policies.least_request.v3 import (
least_request_pb2,
)
from envoy.extensions.load_balancing_policies.round_robin.v3 import (
round_robin_pb2,
)
from envoy.extensions.load_balancing_policies.client_side_weighted_round_robin.v3 import (
client_side_weighted_round_robin_pb2,
)
from envoy.extensions.load_balancing_policies.wrr_locality.v3 import (
wrr_locality_pb2,
)
from envoy.extensions.health_check.event_sinks.file.v3 import file_pb2
from envoy.extensions.early_data.v3 import default_early_data_policy_pb2
from envoy.extensions.watchdog.profile_action.v3 import profile_action_pb2
from envoy.extensions.http.custom_response.local_response_policy.v3 import (
local_response_policy_pb2,
)
from envoy.extensions.http.custom_response.redirect_policy.v3 import (
redirect_policy_pb2,
)
from envoy.extensions.http.stateful_session.cookie.v3 import cookie_pb2
from envoy.extensions.http.stateful_session.header.v3 import header_pb2
from envoy.extensions.http.early_header_mutation.header_mutation.v3 import (
header_mutation_pb2,
)
from envoy.extensions.http.header_formatters.preserve_case.v3 import (
preserve_case_pb2,
)
from envoy.extensions.http.original_ip_detection.custom_header.v3 import (
custom_header_pb2,
)
from envoy.extensions.http.original_ip_detection.xff.v3 import xff_pb2
from envoy.extensions.http.cache.simple_http_cache.v3 import config_pb2
from envoy.extensions.http.cache.file_system_http_cache.v3 import (
file_system_http_cache_pb2,
)
from envoy.extensions.http.header_validators.envoy_default.v3 import (
header_validator_pb2,
)
from envoy.extensions.request_id.uuid.v3 import uuid_pb2
from envoy.extensions.formatter.req_without_query.v3 import (
req_without_query_pb2,
)
from envoy.extensions.formatter.metadata.v3 import metadata_pb2
from envoy.extensions.formatter.cel.v3 import cel_pb2
from envoy.extensions.filters.listener.original_dst.v3 import original_dst_pb2
from envoy.extensions.filters.listener.tls_inspector.v3 import tls_inspector_pb2
from envoy.extensions.filters.listener.local_ratelimit.v3 import (
local_ratelimit_pb2,
)
from envoy.extensions.filters.listener.proxy_protocol.v3 import (
proxy_protocol_pb2,
)
from envoy.extensions.filters.listener.http_inspector.v3 import (
http_inspector_pb2,
)
from envoy.extensions.filters.listener.original_src.v3 import original_src_pb2
from envoy.extensions.filters.common.matcher.action.v3 import skip_action_pb2
from envoy.extensions.filters.common.dependency.v3 import dependency_pb2
from envoy.extensions.filters.common.fault.v3 import fault_pb2
from envoy.extensions.filters.network.mongo_proxy.v3 import mongo_proxy_pb2
from envoy.extensions.filters.network.ext_authz.v3 import ext_authz_pb2
from envoy.extensions.filters.network.ratelimit.v3 import rate_limit_pb2
from envoy.extensions.filters.network.sni_dynamic_forward_proxy.v3 import (
sni_dynamic_forward_proxy_pb2,
)
from envoy.extensions.filters.network.thrift_proxy.v3 import thrift_proxy_pb2
from envoy.extensions.filters.network.thrift_proxy.v3 import route_pb2
from envoy.extensions.filters.network.thrift_proxy.filters.ratelimit.v3 import (
rate_limit_pb2,
)
from envoy.extensions.filters.network.thrift_proxy.filters.header_to_metadata.v3 import (
header_to_metadata_pb2,
)
from envoy.extensions.filters.network.thrift_proxy.filters.payload_to_metadata.v3 import (
payload_to_metadata_pb2,
)
from envoy.extensions.filters.network.thrift_proxy.router.v3 import router_pb2
from envoy.extensions.filters.network.zookeeper_proxy.v3 import (
zookeeper_proxy_pb2,
)
from envoy.extensions.filters.network.dubbo_proxy.v3 import route_pb2
from envoy.extensions.filters.network.dubbo_proxy.v3 import dubbo_proxy_pb2
from envoy.extensions.filters.network.dubbo_proxy.router.v3 import router_pb2
from envoy.extensions.filters.network.rbac.v3 import rbac_pb2
from envoy.extensions.filters.network.local_ratelimit.v3 import (
local_rate_limit_pb2,
)
from envoy.extensions.filters.network.connection_limit.v3 import (
connection_limit_pb2,
)
from envoy.extensions.filters.network.tcp_proxy.v3 import tcp_proxy_pb2
from envoy.extensions.filters.network.echo.v3 import echo_pb2
from envoy.extensions.filters.network.direct_response.v3 import config_pb2
from envoy.extensions.filters.network.sni_cluster.v3 import sni_cluster_pb2
from envoy.extensions.filters.network.redis_proxy.v3 import redis_proxy_pb2
from envoy.extensions.filters.network.http_connection_manager.v3 import (
http_connection_manager_pb2,
)
from envoy.extensions.filters.network.wasm.v3 import wasm_pb2
from envoy.extensions.filters.http.custom_response.v3 import custom_response_pb2
from envoy.extensions.filters.http.dynamic_forward_proxy.v3 import (
dynamic_forward_proxy_pb2,
)
from envoy.extensions.filters.http.oauth2.v3 import oauth_pb2
from envoy.extensions.filters.http.gzip.v3 import gzip_pb2
from envoy.extensions.filters.http.grpc_http1_reverse_bridge.v3 import (
config_pb2,
)
from envoy.extensions.filters.http.buffer.v3 import buffer_pb2
from envoy.extensions.filters.http.cors.v3 import cors_pb2
from envoy.extensions.filters.http.decompressor.v3 import decompressor_pb2
from envoy.extensions.filters.http.stateful_session.v3 import (
stateful_session_pb2,
)
from envoy.extensions.filters.http.health_check.v3 import health_check_pb2
from envoy.extensions.filters.http.ext_authz.v3 import ext_authz_pb2
from envoy.extensions.filters.http.ratelimit.v3 import rate_limit_pb2
from envoy.extensions.filters.http.geoip.v3 import geoip_pb2
from envoy.extensions.filters.http.compressor.v3 import compressor_pb2
from envoy.extensions.filters.http.cache.v3 import cache_pb2
from envoy.extensions.filters.http.adaptive_concurrency.v3 import (
adaptive_concurrency_pb2,
)
from envoy.extensions.filters.http.kill_request.v3 import kill_request_pb2
from envoy.extensions.filters.http.admission_control.v3 import (
admission_control_pb2,
)
from envoy.extensions.filters.http.on_demand.v3 import on_demand_pb2
from envoy.extensions.filters.http.header_to_metadata.v3 import (
header_to_metadata_pb2,
)
from envoy.extensions.filters.http.aws_request_signing.v3 import (
aws_request_signing_pb2,
)
from envoy.extensions.filters.http.rbac.v3 import rbac_pb2
from envoy.extensions.filters.http.cdn_loop.v3 import cdn_loop_pb2
from envoy.extensions.filters.http.composite.v3 import composite_pb2
from envoy.extensions.filters.http.csrf.v3 import csrf_pb2
from envoy.extensions.filters.http.local_ratelimit.v3 import (
local_rate_limit_pb2,
)
from envoy.extensions.filters.http.aws_lambda.v3 import aws_lambda_pb2
from envoy.extensions.filters.http.tap.v3 import tap_pb2
from envoy.extensions.filters.http.connect_grpc_bridge.v3 import config_pb2
from envoy.extensions.filters.http.header_mutation.v3 import header_mutation_pb2
from envoy.extensions.filters.http.ext_proc.v3 import processing_mode_pb2
from envoy.extensions.filters.http.ext_proc.v3 import ext_proc_pb2
from envoy.extensions.filters.http.grpc_http1_bridge.v3 import config_pb2
from envoy.extensions.filters.http.gcp_authn.v3 import gcp_authn_pb2
from envoy.extensions.filters.http.alternate_protocols_cache.v3 import (
alternate_protocols_cache_pb2,
)
from envoy.extensions.filters.http.lua.v3 import lua_pb2
from envoy.extensions.filters.http.ip_tagging.v3 import ip_tagging_pb2
from envoy.extensions.filters.http.grpc_stats.v3 import config_pb2
from envoy.extensions.filters.http.set_metadata.v3 import set_metadata_pb2
from envoy.extensions.filters.http.router.v3 import router_pb2
from envoy.extensions.filters.http.fault.v3 import fault_pb2
from envoy.extensions.filters.http.bandwidth_limit.v3 import bandwidth_limit_pb2
from envoy.extensions.filters.http.file_system_buffer.v3 import (
file_system_buffer_pb2,
)
from envoy.extensions.filters.http.jwt_authn.v3 import config_pb2
from envoy.extensions.filters.http.grpc_web.v3 import grpc_web_pb2
from envoy.extensions.filters.http.grpc_json_transcoder.v3 import transcoder_pb2
from envoy.extensions.filters.http.wasm.v3 import wasm_pb2
from envoy.extensions.filters.http.original_src.v3 import original_src_pb2
from envoy.extensions.filters.http.rate_limit_quota.v3 import (
rate_limit_quota_pb2,
)
from envoy.extensions.filters.http.upstream_codec.v3 import upstream_codec_pb2
from envoy.extensions.filters.udp.udp_proxy.v3 import route_pb2
from envoy.extensions.filters.udp.udp_proxy.v3 import udp_proxy_pb2
from envoy.extensions.filters.udp.dns_filter.v3 import dns_filter_pb2
from envoy.extensions.quic.proof_source.v3 import proof_source_pb2
from envoy.extensions.quic.crypto_stream.v3 import crypto_stream_pb2
from envoy.extensions.quic.server_preferred_address.v3 import (
fixed_server_preferred_address_config_pb2,
)
from envoy.extensions.quic.connection_id_generator.v3 import (
envoy_deterministic_connection_id_generator_pb2,
)
from envoy.extensions.rbac.audit_loggers.stream.v3 import stream_pb2
from envoy.extensions.rbac.matchers.upstream_ip_port.v3 import (
upstream_ip_port_matcher_pb2,
)
from envoy.extensions.path.match.uri_template.v3 import uri_template_match_pb2
from envoy.extensions.path.rewrite.uri_template.v3 import (
uri_template_rewrite_pb2,
)
from envoy.extensions.upstreams.tcp.v3 import tcp_protocol_options_pb2
from envoy.extensions.upstreams.tcp.generic.v3 import (
generic_connection_pool_pb2,
)
from envoy.extensions.upstreams.http.v3 import http_protocol_options_pb2
from envoy.extensions.upstreams.http.generic.v3 import (
generic_connection_pool_pb2,
)
from envoy.extensions.upstreams.http.tcp.v3 import tcp_connection_pool_pb2
from envoy.extensions.upstreams.http.http.v3 import http_connection_pool_pb2
from envoy.extensions.compression.gzip.decompressor.v3 import gzip_pb2
from envoy.extensions.compression.gzip.compressor.v3 import gzip_pb2
from envoy.extensions.compression.brotli.decompressor.v3 import brotli_pb2
from envoy.extensions.compression.brotli.compressor.v3 import brotli_pb2
from envoy.extensions.compression.zstd.decompressor.v3 import zstd_pb2
from envoy.extensions.compression.zstd.compressor.v3 import zstd_pb2
from envoy.extensions.resource_monitors.downstream_connections.v3 import (
downstream_connections_pb2,
)
from envoy.extensions.resource_monitors.fixed_heap.v3 import fixed_heap_pb2
from envoy.extensions.resource_monitors.injected_resource.v3 import (
injected_resource_pb2,
)
from envoy.extensions.key_value.file_based.v3 import config_pb2
from envoy.extensions.health_checkers.redis.v3 import redis_pb2
from envoy.extensions.health_checkers.thrift.v3 import thrift_pb2
from envoy.extensions.access_loggers.open_telemetry.v3 import logs_service_pb2
from envoy.extensions.access_loggers.grpc.v3 import als_pb2
from envoy.extensions.access_loggers.stream.v3 import stream_pb2
from envoy.extensions.access_loggers.filters.cel.v3 import cel_pb2
from envoy.extensions.access_loggers.file.v3 import file_pb2
from envoy.extensions.access_loggers.wasm.v3 import wasm_pb2
from envoy.extensions.regex_engines.v3 import google_re2_pb2
from envoy.extensions.clusters.dynamic_forward_proxy.v3 import cluster_pb2
from envoy.extensions.clusters.redis.v3 import redis_cluster_pb2
from envoy.extensions.clusters.aggregate.v3 import cluster_pb2
from envoy.extensions.bootstrap.internal_listener.v3 import (
internal_listener_pb2,
)
from envoy.extensions.wasm.v3 import wasm_pb2
from envoy.data.core.v2alpha import health_check_event_pb2
from envoy.data.core.v3 import health_check_event_pb2
from envoy.data.cluster.v2alpha import outlier_detection_event_pb2
from envoy.data.cluster.v3 import outlier_detection_event_pb2
from envoy.data.dns.v2alpha import dns_table_pb2
from envoy.data.dns.v3 import dns_table_pb2
from envoy.data.accesslog.v3 import accesslog_pb2
from envoy.data.accesslog.v2 import accesslog_pb2
from envoy.data.tap.v2alpha import common_pb2
from envoy.data.tap.v2alpha import http_pb2
from envoy.data.tap.v2alpha import wrapper_pb2
from envoy.data.tap.v2alpha import transport_pb2
from envoy.data.tap.v3 import common_pb2
from envoy.data.tap.v3 import http_pb2
from envoy.data.tap.v3 import wrapper_pb2
from envoy.data.tap.v3 import transport_pb2
from envoy.watchdog.v3 import abort_action_pb2
from envoy.admin.v2alpha import mutex_stats_pb2
from envoy.admin.v2alpha import memory_pb2
from envoy.admin.v2alpha import server_info_pb2
from envoy.admin.v2alpha import certs_pb2
from envoy.admin.v2alpha import tap_pb2
from envoy.admin.v2alpha import metrics_pb2
from envoy.admin.v2alpha import config_dump_pb2
from envoy.admin.v2alpha import clusters_pb2
from envoy.admin.v2alpha import listeners_pb2
from envoy.admin.v3 import mutex_stats_pb2
from envoy.admin.v3 import memory_pb2
from envoy.admin.v3 import server_info_pb2
from envoy.admin.v3 import certs_pb2
from envoy.admin.v3 import tap_pb2
from envoy.admin.v3 import metrics_pb2
from envoy.admin.v3 import config_dump_pb2
from envoy.admin.v3 import clusters_pb2
from envoy.admin.v3 import init_dump_pb2
from envoy.admin.v3 import listeners_pb2
from envoy.admin.v3 import config_dump_shared_pb2
from envoy.service.load_stats.v3 import lrs_pb2
from envoy.service.load_stats.v3 import lrs_pb2_grpc
from envoy.service.load_stats.v2 import lrs_pb2
from envoy.service.load_stats.v2 import lrs_pb2_grpc
from envoy.service.listener.v3 import lds_pb2
from envoy.service.listener.v3 import lds_pb2_grpc
from envoy.service.extension.v3 import config_discovery_pb2
from envoy.service.extension.v3 import config_discovery_pb2_grpc
from envoy.service.ratelimit.v3 import rls_pb2
from envoy.service.ratelimit.v3 import rls_pb2_grpc
from envoy.service.ratelimit.v2 import rls_pb2
from envoy.service.ratelimit.v2 import rls_pb2_grpc
from envoy.service.trace.v3 import trace_service_pb2
from envoy.service.trace.v3 import trace_service_pb2_grpc
from envoy.service.trace.v2 import trace_service_pb2
from envoy.service.trace.v2 import trace_service_pb2_grpc
from envoy.service.cluster.v3 import cds_pb2
from envoy.service.cluster.v3 import cds_pb2_grpc
from envoy.service.endpoint.v3 import leds_pb2
from envoy.service.endpoint.v3 import leds_pb2_grpc
from envoy.service.endpoint.v3 import eds_pb2
from envoy.service.endpoint.v3 import eds_pb2_grpc
from envoy.service.auth.v2alpha import external_auth_pb2
from envoy.service.auth.v2alpha import external_auth_pb2_grpc
from envoy.service.auth.v3 import external_auth_pb2
from envoy.service.auth.v3 import external_auth_pb2_grpc
from envoy.service.auth.v3 import attribute_context_pb2
from envoy.service.auth.v3 import attribute_context_pb2_grpc
from envoy.service.auth.v2 import external_auth_pb2
from envoy.service.auth.v2 import external_auth_pb2_grpc
from envoy.service.auth.v2 import attribute_context_pb2
from envoy.service.auth.v2 import attribute_context_pb2_grpc
from envoy.service.accesslog.v3 import als_pb2
from envoy.service.accesslog.v3 import als_pb2_grpc
from envoy.service.accesslog.v2 import als_pb2
from envoy.service.accesslog.v2 import als_pb2_grpc
from envoy.service.tap.v2alpha import tap_pb2
from envoy.service.tap.v2alpha import tap_pb2_grpc
from envoy.service.tap.v2alpha import common_pb2
from envoy.service.tap.v2alpha import common_pb2_grpc
from envoy.service.tap.v3 import tap_pb2
from envoy.service.tap.v3 import tap_pb2_grpc
from envoy.service.ext_proc.v3 import external_processor_pb2
from envoy.service.ext_proc.v3 import external_processor_pb2_grpc
from envoy.service.route.v3 import rds_pb2
from envoy.service.route.v3 import rds_pb2_grpc
from envoy.service.route.v3 import srds_pb2
from envoy.service.route.v3 import srds_pb2_grpc
from envoy.service.event_reporting.v2alpha import event_reporting_service_pb2
from envoy.service.event_reporting.v2alpha import (
event_reporting_service_pb2_grpc,
)
from envoy.service.event_reporting.v3 import event_reporting_service_pb2
from envoy.service.event_reporting.v3 import event_reporting_service_pb2_grpc
from envoy.service.runtime.v3 import rtds_pb2
from envoy.service.runtime.v3 import rtds_pb2_grpc
from envoy.service.health.v3 import hds_pb2
from envoy.service.health.v3 import hds_pb2_grpc
from envoy.service.status.v3 import csds_pb2
from envoy.service.status.v3 import csds_pb2_grpc
from envoy.service.status.v2 import csds_pb2
from envoy.service.status.v2 import csds_pb2_grpc
from envoy.service.rate_limit_quota.v3 import rlqs_pb2
from envoy.service.rate_limit_quota.v3 import rlqs_pb2_grpc
from envoy.service.discovery.v3 import ads_pb2
from envoy.service.discovery.v3 import ads_pb2_grpc
from envoy.service.discovery.v3 import discovery_pb2
from envoy.service.discovery.v3 import discovery_pb2_grpc
from envoy.service.discovery.v2 import ads_pb2
from envoy.service.discovery.v2 import ads_pb2_grpc
from envoy.service.discovery.v2 import sds_pb2
from envoy.service.discovery.v2 import sds_pb2_grpc
from envoy.service.discovery.v2 import hds_pb2
from envoy.service.discovery.v2 import hds_pb2_grpc
from envoy.service.discovery.v2 import rtds_pb2
from envoy.service.discovery.v2 import rtds_pb2_grpc
from envoy.service.secret.v3 import sds_pb2
from envoy.service.secret.v3 import sds_pb2_grpc
from envoy.type import range_pb2
from envoy.type import token_bucket_pb2
from envoy.type import hash_policy_pb2
from envoy.type import semantic_version_pb2
from envoy.type import http_status_pb2
from envoy.type import http_pb2
from envoy.type import percent_pb2
from envoy.type.v3 import range_pb2
from envoy.type.v3 import token_bucket_pb2
from envoy.type.v3 import ratelimit_strategy_pb2
from envoy.type.v3 import hash_policy_pb2
from envoy.type.v3 import ratelimit_unit_pb2
from envoy.type.v3 import semantic_version_pb2
from envoy.type.v3 import http_status_pb2
from envoy.type.v3 import http_pb2
from envoy.type.v3 import percent_pb2
from envoy.type.http.v3 import path_transformation_pb2
from envoy.type.http.v3 import cookie_pb2
from envoy.type.matcher import struct_pb2
from envoy.type.matcher import path_pb2
from envoy.type.matcher import regex_pb2
from envoy.type.matcher import number_pb2
from envoy.type.matcher import metadata_pb2
from envoy.type.matcher import string_pb2
from envoy.type.matcher import node_pb2
from envoy.type.matcher import value_pb2
from envoy.type.matcher.v3 import struct_pb2
from envoy.type.matcher.v3 import http_inputs_pb2
from envoy.type.matcher.v3 import path_pb2
from envoy.type.matcher.v3 import regex_pb2
from envoy.type.matcher.v3 import status_code_input_pb2
from envoy.type.matcher.v3 import number_pb2
from envoy.type.matcher.v3 import metadata_pb2
from envoy.type.matcher.v3 import string_pb2
from envoy.type.matcher.v3 import node_pb2
from envoy.type.matcher.v3 import value_pb2
from envoy.type.matcher.v3 import filter_state_pb2
from envoy.type.metadata.v3 import metadata_pb2
from envoy.type.metadata.v2 import metadata_pb2
from envoy.type.tracing.v3 import custom_tag_pb2
from envoy.type.tracing.v2 import custom_tag_pb2
from envoy.annotations import deprecation_pb2
from envoy.annotations import resource_pb2
from envoy.api.v2 import rds_pb2
from envoy.api.v2 import lds_pb2
from envoy.api.v2 import scoped_route_pb2
from envoy.api.v2 import route_pb2
from envoy.api.v2 import discovery_pb2
from envoy.api.v2 import cds_pb2
from envoy.api.v2 import cluster_pb2
from envoy.api.v2 import eds_pb2
from envoy.api.v2 import srds_pb2
from envoy.api.v2 import listener_pb2
from envoy.api.v2 import endpoint_pb2
from envoy.api.v2.listener import listener_components_pb2
from envoy.api.v2.listener import udp_listener_config_pb2
from envoy.api.v2.listener import quic_config_pb2
from envoy.api.v2.listener import listener_pb2
from envoy.api.v2.core import base_pb2
from envoy.api.v2.core import backoff_pb2
from envoy.api.v2.core import grpc_service_pb2
from envoy.api.v2.core import protocol_pb2
from envoy.api.v2.core import address_pb2
from envoy.api.v2.core import health_check_pb2
from envoy.api.v2.core import grpc_method_list_pb2
from envoy.api.v2.core import socket_option_pb2
from envoy.api.v2.core import config_source_pb2
from envoy.api.v2.core import event_service_config_pb2
from envoy.api.v2.core import http_uri_pb2
from envoy.api.v2.ratelimit import ratelimit_pb2
from envoy.api.v2.cluster import filter_pb2
from envoy.api.v2.cluster import circuit_breaker_pb2
from envoy.api.v2.cluster import outlier_detection_pb2
from envoy.api.v2.endpoint import endpoint_components_pb2
from envoy.api.v2.endpoint import load_report_pb2
from envoy.api.v2.endpoint import endpoint_pb2
from envoy.api.v2.auth import cert_pb2
from envoy.api.v2.auth import common_pb2
from envoy.api.v2.auth import tls_pb2
from envoy.api.v2.auth import secret_pb2
from envoy.api.v2.route import route_components_pb2
from envoy.api.v2.route import route_pb2
from xds.core.v3 import cidr_pb2
from xds.core.v3 import authority_pb2
from xds.core.v3 import resource_locator_pb2
from xds.core.v3 import resource_name_pb2
from xds.core.v3 import context_params_pb2
from xds.core.v3 import resource_pb2
from xds.core.v3 import extension_pb2
from xds.core.v3 import collection_entry_pb2
from xds.data.orca.v3 import orca_load_report_pb2
from xds.service.orca.v3 import orca_pb2
from xds.type.v3 import range_pb2
from xds.type.v3 import cel_pb2
from xds.type.v3 import typed_struct_pb2
from xds.type.matcher.v3 import range_pb2
from xds.type.matcher.v3 import http_inputs_pb2
from xds.type.matcher.v3 import domain_pb2
from xds.type.matcher.v3 import regex_pb2
from xds.type.matcher.v3 import cel_pb2
from xds.type.matcher.v3 import matcher_pb2
from xds.type.matcher.v3 import string_pb2
from xds.type.matcher.v3 import ip_pb2
from xds.annotations.v3 import versioning_pb2
from xds.annotations.v3 import migrate_pb2
from xds.annotations.v3 import sensitive_pb2
from xds.annotations.v3 import status_pb2
from xds.annotations.v3 import security_pb2
from udpa.data.orca.v1 import orca_load_report_pb2
from udpa.service.orca.v1 import orca_pb2
from udpa.type.v1 import typed_struct_pb2
from udpa.annotations import versioning_pb2
from udpa.annotations import migrate_pb2
from udpa.annotations import sensitive_pb2
from udpa.annotations import status_pb2
from udpa.annotations import security_pb2
from google.api import context_pb2
from google.api import visibility_pb2
from google.api import config_change_pb2
from google.api import source_info_pb2
from google.api import field_behavior_pb2
from google.api import monitored_resource_pb2
from google.api import metric_pb2
from google.api import usage_pb2
from google.api import backend_pb2
from google.api import monitoring_pb2
from google.api import control_pb2
from google.api import billing_pb2
from google.api import system_parameter_pb2
from google.api import auth_pb2
from google.api import quota_pb2
from google.api import client_pb2
from google.api import documentation_pb2
from google.api import http_pb2
from google.api import resource_pb2
from google.api import annotations_pb2
from google.api import log_pb2
from google.api import httpbody_pb2
from google.api import service_pb2
from google.api import launch_stage_pb2
from google.api import consumer_pb2
from google.api import endpoint_pb2
from google.api import label_pb2
from google.api import distribution_pb2
from google.api import logging_pb2
from google.api import error_reason_pb2
from google.api.servicecontrol.v1 import log_entry_pb2
from google.api.servicecontrol.v1 import metric_value_pb2
from google.api.servicecontrol.v1 import operation_pb2
from google.api.servicecontrol.v1 import service_controller_pb2
from google.api.servicecontrol.v1 import http_request_pb2
from google.api.servicecontrol.v1 import quota_controller_pb2
from google.api.servicecontrol.v1 import check_error_pb2
from google.api.servicecontrol.v1 import distribution_pb2
from google.api.servicemanagement.v1 import resources_pb2
from google.api.servicemanagement.v1 import servicemanager_pb2
from google.api.expr.v1beta1 import source_pb2
from google.api.expr.v1beta1 import eval_pb2
from google.api.expr.v1beta1 import expr_pb2
from google.api.expr.v1beta1 import value_pb2
from google.api.expr.v1beta1 import decl_pb2
from google.api.expr.v1alpha1 import explain_pb2
from google.api.expr.v1alpha1 import eval_pb2
from google.api.expr.v1alpha1 import syntax_pb2
from google.api.expr.v1alpha1 import checked_pb2
from google.api.expr.v1alpha1 import conformance_service_pb2
from google.api.expr.v1alpha1 import value_pb2
from google.api.serviceusage.v1 import resources_pb2
from google.api.serviceusage.v1 import serviceusage_pb2
from google.api.serviceusage.v1beta1 import resources_pb2
from google.api.serviceusage.v1beta1 import serviceusage_pb2
from google.rpc import code_pb2
from google.rpc import error_details_pb2
from google.rpc import status_pb2
from google.rpc.context import attribute_context_pb2
from google.longrunning import operations_pb2
from google.logging.v2 import logging_metrics_pb2
from google.logging.v2 import log_entry_pb2
from google.logging.v2 import logging_config_pb2
from google.logging.v2 import logging_pb2
from google.logging.type import http_request_pb2
from google.logging.type import log_severity_pb2
from google.type import calendar_period_pb2
from google.type import datetime_pb2
from google.type import color_pb2
from google.type import phone_number_pb2
from google.type import money_pb2
from google.type import timeofday_pb2
from google.type import decimal_pb2
from google.type import postal_address_pb2
from google.type import date_pb2
from google.type import expr_pb2
from google.type import interval_pb2
from google.type import localized_text_pb2
from google.type import dayofweek_pb2
from google.type import quaternion_pb2
from google.type import month_pb2
from google.type import latlng_pb2
from google.type import fraction_pb2
from validate import validate_pb2
from opencensus.proto.metrics.v1 import metrics_pb2
from opencensus.proto.agent.common.v1 import common_pb2
from opencensus.proto.agent.metrics.v1 import metrics_service_pb2
from opencensus.proto.agent.trace.v1 import trace_service_pb2
from opencensus.proto.trace.v1 import trace_config_pb2
from opencensus.proto.trace.v1 import trace_pb2
from opencensus.proto.stats.v1 import stats_pb2
from opencensus.proto.resource.v1 import resource_pb2
from opentelemetry.proto.common.v1 import common_pb2
from opentelemetry.proto.metrics.v1 import metrics_pb2
from opentelemetry.proto.metrics.experimental import metrics_config_service_pb2
from opentelemetry.proto.trace.v1 import trace_config_pb2
from opentelemetry.proto.trace.v1 import trace_pb2
from opentelemetry.proto.logs.v1 import logs_pb2
from opentelemetry.proto.collector.metrics.v1 import metrics_service_pb2
from opentelemetry.proto.collector.trace.v1 import trace_service_pb2
from opentelemetry.proto.collector.logs.v1 import logs_service_pb2
from opentelemetry.proto.resource.v1 import resource_pb2

@ -1,13 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,535 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides an interface to xDS Test Client running remotely.
"""
import datetime
import functools
import logging
import time
from typing import Iterable, List, Optional
import framework.errors
from framework.helpers import retryers
import framework.rpc
from framework.rpc import grpc_channelz
from framework.rpc import grpc_csds
from framework.rpc import grpc_testing
logger = logging.getLogger(__name__)
# Type aliases
_timedelta = datetime.timedelta
_LoadBalancerStatsServiceClient = grpc_testing.LoadBalancerStatsServiceClient
_XdsUpdateClientConfigureServiceClient = (
grpc_testing.XdsUpdateClientConfigureServiceClient
)
_ChannelzServiceClient = grpc_channelz.ChannelzServiceClient
_ChannelzChannel = grpc_channelz.Channel
_ChannelzChannelData = grpc_channelz.ChannelData
_ChannelzChannelState = grpc_channelz.ChannelState
_ChannelzSubchannel = grpc_channelz.Subchannel
_ChannelzSocket = grpc_channelz.Socket
_CsdsClient = grpc_csds.CsdsClient
# Use in get_load_balancer_stats request to request all metadata.
REQ_LB_STATS_METADATA_ALL = ("*",)
DEFAULT_TD_XDS_URI = "trafficdirector.googleapis.com:443"
class XdsTestClient(framework.rpc.grpc.GrpcApp):
"""
Represents RPC services implemented in Client component of the xds test app.
https://github.com/grpc/grpc/blob/master/doc/xds-test-descriptions.md#client
"""
# A unique string identifying each client replica. Used in logging.
hostname: str
def __init__(
self,
*,
ip: str,
rpc_port: int,
server_target: str,
hostname: str,
rpc_host: Optional[str] = None,
maintenance_port: Optional[int] = None,
):
super().__init__(rpc_host=(rpc_host or ip))
self.ip = ip
self.rpc_port = rpc_port
self.server_target = server_target
self.maintenance_port = maintenance_port or rpc_port
self.hostname = hostname
@property
@functools.lru_cache(None)
def load_balancer_stats(self) -> _LoadBalancerStatsServiceClient:
return _LoadBalancerStatsServiceClient(
self._make_channel(self.rpc_port),
log_target=f"{self.hostname}:{self.rpc_port}",
)
@property
@functools.lru_cache(None)
def update_config(self):
return _XdsUpdateClientConfigureServiceClient(
self._make_channel(self.rpc_port),
log_target=f"{self.hostname}:{self.rpc_port}",
)
@property
@functools.lru_cache(None)
def channelz(self) -> _ChannelzServiceClient:
return _ChannelzServiceClient(
self._make_channel(self.maintenance_port),
log_target=f"{self.hostname}:{self.maintenance_port}",
)
@property
@functools.lru_cache(None)
def csds(self) -> _CsdsClient:
return _CsdsClient(
self._make_channel(self.maintenance_port),
log_target=f"{self.hostname}:{self.maintenance_port}",
)
def get_load_balancer_stats(
self,
*,
num_rpcs: int,
metadata_keys: Optional[tuple[str, ...]] = None,
timeout_sec: Optional[int] = None,
) -> grpc_testing.LoadBalancerStatsResponse:
"""
Shortcut to LoadBalancerStatsServiceClient.get_client_stats()
"""
return self.load_balancer_stats.get_client_stats(
num_rpcs=num_rpcs,
timeout_sec=timeout_sec,
metadata_keys=metadata_keys,
)
def get_load_balancer_accumulated_stats(
self,
*,
timeout_sec: Optional[int] = None,
) -> grpc_testing.LoadBalancerAccumulatedStatsResponse:
"""Shortcut to LoadBalancerStatsServiceClient.get_client_accumulated_stats()"""
return self.load_balancer_stats.get_client_accumulated_stats(
timeout_sec=timeout_sec
)
def wait_for_server_channel_ready(
self,
*,
timeout: Optional[_timedelta] = None,
rpc_deadline: Optional[_timedelta] = None,
) -> _ChannelzChannel:
"""Wait for the channel to the server to transition to READY.
Raises:
GrpcApp.NotFound: If the channel never transitioned to READY.
"""
try:
return self.wait_for_server_channel_state(
_ChannelzChannelState.READY,
timeout=timeout,
rpc_deadline=rpc_deadline,
)
except retryers.RetryError as retry_err:
if isinstance(retry_err.exception(), self.ChannelNotFound):
retry_err.add_note(
framework.errors.FrameworkError.note_blanket_error(
"The client couldn't connect to the server."
)
)
raise
def wait_for_active_xds_channel(
self,
*,
xds_server_uri: Optional[str] = None,
timeout: Optional[_timedelta] = None,
rpc_deadline: Optional[_timedelta] = None,
) -> _ChannelzChannel:
"""Wait until the xds channel is active or timeout.
Raises:
GrpcApp.NotFound: If the channel to xds never transitioned to active.
"""
try:
return self.wait_for_xds_channel_active(
xds_server_uri=xds_server_uri,
timeout=timeout,
rpc_deadline=rpc_deadline,
)
except retryers.RetryError as retry_err:
if isinstance(retry_err.exception(), self.ChannelNotFound):
retry_err.add_note(
framework.errors.FrameworkError.note_blanket_error(
"The client couldn't connect to the xDS control plane."
)
)
raise
def get_active_server_channel_socket(self) -> _ChannelzSocket:
channel = self.find_server_channel_with_state(
_ChannelzChannelState.READY
)
# Get the first subchannel of the active channel to the server.
logger.debug(
(
"[%s] Retrieving client -> server socket, "
"channel_id: %s, subchannel: %s"
),
self.hostname,
channel.ref.channel_id,
channel.subchannel_ref[0].name,
)
subchannel, *subchannels = list(
self.channelz.list_channel_subchannels(channel)
)
if subchannels:
logger.warning(
"[%s] Unexpected subchannels: %r", self.hostname, subchannels
)
# Get the first socket of the subchannel
socket, *sockets = list(
self.channelz.list_subchannels_sockets(subchannel)
)
if sockets:
logger.warning(
"[%s] Unexpected sockets: %r", self.hostname, subchannels
)
logger.debug(
"[%s] Found client -> server socket: %s",
self.hostname,
socket.ref.name,
)
return socket
def wait_for_server_channel_state(
self,
state: _ChannelzChannelState,
*,
timeout: Optional[_timedelta] = None,
rpc_deadline: Optional[_timedelta] = None,
) -> _ChannelzChannel:
# When polling for a state, prefer smaller wait times to avoid
# exhausting all allowed time on a single long RPC.
if rpc_deadline is None:
rpc_deadline = _timedelta(seconds=30)
# Fine-tuned to wait for the channel to the server.
retryer = retryers.exponential_retryer_with_timeout(
wait_min=_timedelta(seconds=10),
wait_max=_timedelta(seconds=25),
timeout=_timedelta(minutes=5) if timeout is None else timeout,
)
logger.info(
"[%s] Waiting to report a %s channel to %s",
self.hostname,
_ChannelzChannelState.Name(state),
self.server_target,
)
channel = retryer(
self.find_server_channel_with_state,
state,
rpc_deadline=rpc_deadline,
)
logger.info(
"[%s] Channel to %s transitioned to state %s: %s",
self.hostname,
self.server_target,
_ChannelzChannelState.Name(state),
_ChannelzServiceClient.channel_repr(channel),
)
return channel
def wait_for_xds_channel_active(
self,
*,
xds_server_uri: Optional[str] = None,
timeout: Optional[_timedelta] = None,
rpc_deadline: Optional[_timedelta] = None,
) -> _ChannelzChannel:
if not xds_server_uri:
xds_server_uri = DEFAULT_TD_XDS_URI
# When polling for a state, prefer smaller wait times to avoid
# exhausting all allowed time on a single long RPC.
if rpc_deadline is None:
rpc_deadline = _timedelta(seconds=30)
retryer = retryers.exponential_retryer_with_timeout(
wait_min=_timedelta(seconds=10),
wait_max=_timedelta(seconds=25),
timeout=_timedelta(minutes=5) if timeout is None else timeout,
)
logger.info(
"[%s] ADS: Waiting for active calls to xDS control plane to %s",
self.hostname,
xds_server_uri,
)
channel = retryer(
self.find_active_xds_channel,
xds_server_uri=xds_server_uri,
rpc_deadline=rpc_deadline,
)
logger.info(
"[%s] ADS: Detected active calls to xDS control plane %s",
self.hostname,
xds_server_uri,
)
return channel
def find_active_xds_channel(
self,
xds_server_uri: str,
*,
rpc_deadline: Optional[_timedelta] = None,
) -> _ChannelzChannel:
rpc_params = {}
if rpc_deadline is not None:
rpc_params["deadline_sec"] = rpc_deadline.total_seconds()
for channel in self.find_channels(xds_server_uri, **rpc_params):
logger.info(
"[%s] xDS control plane channel: %s",
self.hostname,
_ChannelzServiceClient.channel_repr(channel),
)
try:
channel_upd = self.check_channel_in_flight_calls(
channel, **rpc_params
)
logger.info(
"[%s] Detected active calls to xDS control plane %s,"
" channel: %s",
self.hostname,
xds_server_uri,
_ChannelzServiceClient.channel_repr(channel_upd),
)
return channel_upd
except self.NotFound:
# Continue checking other channels to the same target on
# not found.
continue
except framework.rpc.grpc.RpcError as err:
# Logged at 'info' and not at 'warning' because this method is
# expected to be called in a retryer. If this error eventually
# causes the retryer to fail, it will be logged fully at 'error'
logger.info(
"[%s] Unexpected error while checking xDS control plane"
" channel %s: %r",
self.hostname,
_ChannelzServiceClient.channel_repr(channel),
err,
)
raise
raise self.ChannelNotActive(
f"[{self.hostname}] Client has no"
f" active channel with xDS control plane {xds_server_uri}",
src=self.hostname,
dst=xds_server_uri,
)
def find_server_channel_with_state(
self,
expected_state: _ChannelzChannelState,
*,
rpc_deadline: Optional[_timedelta] = None,
check_subchannel=True,
) -> _ChannelzChannel:
rpc_params = {}
if rpc_deadline is not None:
rpc_params["deadline_sec"] = rpc_deadline.total_seconds()
expected_state_name: str = _ChannelzChannelState.Name(expected_state)
target: str = self.server_target
for channel in self.find_channels(target, **rpc_params):
channel_state: _ChannelzChannelState = channel.data.state.state
logger.info(
"[%s] Server channel: %s",
self.hostname,
_ChannelzServiceClient.channel_repr(channel),
)
if channel_state is expected_state:
if check_subchannel:
# When requested, check if the channel has at least
# one subchannel in the requested state.
try:
subchannel = self.find_subchannel_with_state(
channel, expected_state, **rpc_params
)
logger.info(
"[%s] Found subchannel in state %s: %s",
self.hostname,
expected_state_name,
_ChannelzServiceClient.subchannel_repr(subchannel),
)
except self.NotFound as e:
# Otherwise, keep searching.
logger.info(e.message)
continue
return channel
raise self.ChannelNotFound(
f"[{self.hostname}] Client has no"
f" {expected_state_name} channel with server {target}",
src=self.hostname,
dst=target,
expected_state=expected_state,
)
def find_channels(
self,
target: str,
**rpc_params,
) -> Iterable[_ChannelzChannel]:
return self.channelz.find_channels_for_target(target, **rpc_params)
def find_subchannel_with_state(
self, channel: _ChannelzChannel, state: _ChannelzChannelState, **kwargs
) -> _ChannelzSubchannel:
subchannels = self.channelz.list_channel_subchannels(channel, **kwargs)
for subchannel in subchannels:
if subchannel.data.state.state is state:
return subchannel
raise self.NotFound(
f"[{self.hostname}] Not found "
f"a {_ChannelzChannelState.Name(state)} subchannel "
f"for channel_id {channel.ref.channel_id}"
)
def find_subchannels_with_state(
self, state: _ChannelzChannelState, **kwargs
) -> List[_ChannelzSubchannel]:
subchannels = []
for channel in self.channelz.find_channels_for_target(
self.server_target, **kwargs
):
for subchannel in self.channelz.list_channel_subchannels(
channel, **kwargs
):
if subchannel.data.state.state is state:
subchannels.append(subchannel)
return subchannels
def check_channel_in_flight_calls(
self,
channel: _ChannelzChannel,
*,
wait_between_checks: Optional[_timedelta] = None,
**rpc_params,
) -> Optional[_ChannelzChannel]:
"""Checks if the channel has calls that started, but didn't complete.
We consider the channel is active if channel is in READY state and
calls_started is greater than calls_failed.
This method address race where a call to the xDS control plane server
has just started and a channelz request comes in before the call has
had a chance to fail.
With channels to the xDS control plane, the channel can be READY but the
calls could be failing to initialize, f.e. due to a failure to fetch
OAUTH2 token. To increase the confidence that we have a valid channel
with working OAUTH2 tokens, we check whether the channel is in a READY
state with active calls twice with an interval of 2 seconds between the
two attempts. If the OAUTH2 token is not valid, the call would fail and
be caught in either the first attempt, or the second attempt. It is
possible that between the two attempts, a call fails and a new call is
started, so we also test for equality between the started calls of the
two channelz results.
There still exists a possibility that a call fails on fetching OAUTH2
token after 2 seconds (maybe because there is a slowdown in the
system.) If such a case is observed, consider increasing the interval
from 2 seconds to 5 seconds.
Returns updated channel on success, or None on failure.
"""
if not self.calc_calls_in_flight(channel):
return None
if not wait_between_checks:
wait_between_checks = _timedelta(seconds=2)
# Load the channel second time after the timeout.
time.sleep(wait_between_checks.total_seconds())
channel_upd: _ChannelzChannel = self.channelz.get_channel(
channel.ref.channel_id, **rpc_params
)
if (
not self.calc_calls_in_flight(channel_upd)
or channel.data.calls_started != channel_upd.data.calls_started
):
return None
return channel_upd
@classmethod
def calc_calls_in_flight(cls, channel: _ChannelzChannel) -> int:
cdata: _ChannelzChannelData = channel.data
if cdata.state.state is not _ChannelzChannelState.READY:
return 0
return cdata.calls_started - cdata.calls_succeeded - cdata.calls_failed
class ChannelNotFound(framework.rpc.grpc.GrpcApp.NotFound):
"""Channel with expected status not found"""
src: str
dst: str
expected_state: object
def __init__(
self,
message: str,
*,
src: str,
dst: str,
expected_state: _ChannelzChannelState,
**kwargs,
):
self.src = src
self.dst = dst
self.expected_state = expected_state
super().__init__(message, src, dst, expected_state, **kwargs)
class ChannelNotActive(framework.rpc.grpc.GrpcApp.NotFound):
"""No active channel was found"""
src: str
dst: str
def __init__(
self,
message: str,
*,
src: str,
dst: str,
**kwargs,
):
self.src = src
self.dst = dst
super().__init__(message, src, dst, **kwargs)

@ -1,13 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,105 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Common functionality for running xDS Test Client and Server remotely.
"""
from abc import ABCMeta
from abc import abstractmethod
import functools
import pathlib
import threading
from typing import Dict, Optional
import urllib.parse
from absl import flags
from framework import xds_flags
from framework.helpers import logs
flags.adopt_module_key_flags(logs)
_LOGS_SUBDIR = "test_app_logs"
class RunnerError(Exception):
"""Error running xDS Test App running remotely."""
class BaseRunner(metaclass=ABCMeta):
_logs_subdir: Optional[pathlib.Path] = None
_log_stop_event: Optional[threading.Event] = None
def __init__(self):
if xds_flags.COLLECT_APP_LOGS.value:
self._logs_subdir = logs.log_dir_mkdir(_LOGS_SUBDIR)
self._log_stop_event = threading.Event()
@property
@functools.lru_cache(None)
def should_collect_logs(self) -> bool:
return self._logs_subdir is not None
@property
@functools.lru_cache(None)
def logs_subdir(self) -> pathlib.Path:
if not self.should_collect_logs:
raise FileNotFoundError("Log collection is not enabled.")
return self._logs_subdir
@property
def log_stop_event(self) -> threading.Event:
if not self.should_collect_logs:
raise ValueError("Log collection is not enabled.")
return self._log_stop_event
def maybe_stop_logging(self):
if self.should_collect_logs and not self.log_stop_event.is_set():
self.log_stop_event.set()
@abstractmethod
def run(self, **kwargs):
pass
@abstractmethod
def cleanup(self, *, force=False):
pass
@classmethod
def _logs_explorer_link_from_params(
cls,
*,
gcp_ui_url: str,
gcp_project: str,
query: Dict[str, str],
request: Optional[Dict[str, str]] = None,
) -> str:
req_merged = {"query": cls._logs_explorer_query(query)}
if request is not None:
req_merged.update(request)
req = cls._logs_explorer_request(req_merged)
return f"https://{gcp_ui_url}/logs/query;{req}?project={gcp_project}"
@classmethod
def _logs_explorer_query(cls, query: Dict[str, str]) -> str:
return "\n".join(f'{k}="{v}"' for k, v in query.items())
@classmethod
def _logs_explorer_request(cls, req: Dict[str, str]) -> str:
return ";".join(
f"{k}={cls._logs_explorer_quote(v)}" for k, v in req.items()
)
@classmethod
def _logs_explorer_quote(cls, value: str) -> str:
return urllib.parse.quote_plus(value, safe=":")

@ -1,13 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,303 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Run xDS Test Client on Kubernetes using Gamma
"""
import datetime
import logging
from typing import List, Optional
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.test_app.runners.k8s import k8s_xds_server_runner
from framework.test_app.server_app import XdsTestServer
logger = logging.getLogger(__name__)
KubernetesServerRunner = k8s_xds_server_runner.KubernetesServerRunner
class GammaServerRunner(KubernetesServerRunner):
# Mutable state.
route: Optional[k8s.GammaHttpRoute] = None
frontend_service: Optional[k8s.V1Service] = None
sa_filter: Optional[k8s.GcpSessionAffinityFilter] = None
sa_policy: Optional[k8s.GcpSessionAffinityPolicy] = None
be_policy: Optional[k8s.GcpBackendPolicy] = None
termination_grace_period_seconds: Optional[int] = None
pre_stop_hook: bool = False
route_name: str
frontend_service_name: str
def __init__(
self,
k8s_namespace: k8s.KubernetesNamespace,
frontend_service_name: str,
*,
deployment_name: str,
image_name: str,
td_bootstrap_image: str,
network: str = "default",
xds_server_uri: Optional[str] = None,
gcp_api_manager: gcp.api.GcpApiManager,
gcp_project: str,
gcp_service_account: str,
service_account_name: Optional[str] = None,
service_name: Optional[str] = None,
route_name: Optional[str] = None,
neg_name: Optional[str] = None,
deployment_template: str = "server.deployment.yaml",
service_account_template: str = "service-account.yaml",
service_template: str = "gamma/service.yaml",
reuse_service: bool = False,
reuse_namespace: bool = False,
namespace_template: Optional[str] = None,
debug_use_port_forwarding: bool = False,
enable_workload_identity: bool = True,
safilter_name: str = "ssa-filter",
sapolicy_name: str = "ssa-policy",
bepolicy_name: str = "backend-policy",
termination_grace_period_seconds: int = 0,
pre_stop_hook: bool = False,
):
# pylint: disable=too-many-locals
super().__init__(
k8s_namespace,
deployment_name=deployment_name,
image_name=image_name,
td_bootstrap_image=td_bootstrap_image,
network=network,
xds_server_uri=xds_server_uri,
gcp_api_manager=gcp_api_manager,
gcp_project=gcp_project,
gcp_service_account=gcp_service_account,
service_account_name=service_account_name,
service_name=service_name,
neg_name=neg_name,
deployment_template=deployment_template,
service_account_template=service_account_template,
service_template=service_template,
reuse_service=reuse_service,
reuse_namespace=reuse_namespace,
namespace_template=namespace_template,
debug_use_port_forwarding=debug_use_port_forwarding,
enable_workload_identity=enable_workload_identity,
)
self.frontend_service_name = frontend_service_name
self.route_name = route_name or f"route-{deployment_name}"
self.safilter_name = safilter_name
self.sapolicy_name = sapolicy_name
self.bepolicy_name = bepolicy_name
self.termination_grace_period_seconds = termination_grace_period_seconds
self.pre_stop_hook = pre_stop_hook
def run( # pylint: disable=arguments-differ
self,
*,
test_port: int = KubernetesServerRunner.DEFAULT_TEST_PORT,
maintenance_port: Optional[int] = None,
secure_mode: bool = False,
replica_count: int = 1,
log_to_stdout: bool = False,
bootstrap_version: Optional[str] = None,
route_template: str = "gamma/route_http.yaml",
enable_csm_observability: bool = False,
) -> List[XdsTestServer]:
if not maintenance_port:
maintenance_port = self._get_default_maintenance_port(secure_mode)
logger.info(
(
'Deploying GAMMA xDS test server "%s" to k8s namespace %s:'
" test_port=%s maintenance_port=%s secure_mode=%s"
" replica_count=%s"
),
self.deployment_name,
self.k8s_namespace.name,
test_port,
maintenance_port,
False,
replica_count,
)
# super(k8s_base_runner.KubernetesBaseRunner, self).run()
if self.reuse_namespace:
self.namespace = self._reuse_namespace()
if not self.namespace:
self.namespace = self._create_namespace(
self.namespace_template, namespace_name=self.k8s_namespace.name
)
# Reuse existing if requested, create a new deployment when missing.
# Useful for debugging to avoid NEG loosing relation to deleted service.
if self.reuse_service:
self.service = self._reuse_service(self.service_name)
if not self.service:
self.service = self._create_service(
self.service_template,
service_name=self.service_name,
namespace_name=self.k8s_namespace.name,
deployment_name=self.deployment_name,
neg_name=self.gcp_neg_name,
test_port=test_port,
)
# Create the parentref service
self.frontend_service = self._create_service(
"gamma/frontend_service.yaml",
service_name=self.frontend_service_name,
namespace_name=self.k8s_namespace.name,
)
# Create the route.
self.route = self._create_gamma_route(
route_template,
route_name=self.route_name,
service_name=self.service_name,
namespace_name=self.k8s_namespace.name,
test_port=test_port,
frontend_service_name=self.frontend_service_name,
)
if self.enable_workload_identity:
# Allow Kubernetes service account to use the GCP service account
# identity.
self._grant_workload_identity_user(
gcp_iam=self.gcp_iam,
gcp_service_account=self.gcp_service_account,
service_account_name=self.service_account_name,
)
# Create service account
self.service_account = self._create_service_account(
self.service_account_template,
service_account_name=self.service_account_name,
namespace_name=self.k8s_namespace.name,
gcp_service_account=self.gcp_service_account,
)
# Always create a new deployment
self.deployment = self._create_deployment(
self.deployment_template,
deployment_name=self.deployment_name,
image_name=self.image_name,
namespace_name=self.k8s_namespace.name,
service_account_name=self.service_account_name,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
replica_count=replica_count,
test_port=test_port,
maintenance_port=maintenance_port,
secure_mode=secure_mode,
bootstrap_version=bootstrap_version,
termination_grace_period_seconds=self.termination_grace_period_seconds,
pre_stop_hook=self.pre_stop_hook,
enable_csm_observability=enable_csm_observability,
)
servers = self._make_servers_for_deployment(
replica_count,
test_port=test_port,
maintenance_port=maintenance_port,
log_to_stdout=log_to_stdout,
secure_mode=secure_mode,
)
# The controller will not populate the NEGs until there are
# endpoint slices.
# For this reason, we run this check after the servers were created,
# and increase the wait time from 1 minute to 3.
self._wait_service_neg_status_annotation(
self.service_name,
test_port,
timeout_sec=datetime.timedelta(minutes=3).total_seconds(),
)
return servers
def createSessionAffinityPolicy(self, manifest):
self.sa_policy = self._create_session_affinity_policy(
manifest,
session_affinity_policy_name=self.sapolicy_name,
namespace_name=self.k8s_namespace.name,
route_name=self.route_name,
service_name=self.service_name,
)
def createSessionAffinityFilter(self):
self.sa_filter = self._create_session_affinity_filter(
"gamma/session_affinity_filter.yaml",
session_affinity_filter_name=self.safilter_name,
namespace_name=self.k8s_namespace.name,
)
def createBackendPolicy(self):
self.be_policy = self._create_backend_policy(
"gamma/backend_policy.yaml",
be_policy_name=self.bepolicy_name,
namespace_name=self.k8s_namespace.name,
service_name=self.service_name,
)
# pylint: disable=arguments-differ
def cleanup(self, *, force=False, force_namespace=False):
try:
if self.route or force:
self._delete_gamma_route(self.route_name)
self.route = None
if self.frontend_service or force:
self._delete_service(self.frontend_service_name)
self.frontend_service = None
if (self.service and not self.reuse_service) or force:
self._delete_service(self.service_name)
self.service = None
if self.deployment or force:
self._delete_deployment(self.deployment_name)
self.deployment = None
if self.sa_policy or force:
self._delete_session_affinity_policy(self.sapolicy_name)
self.sa_policy = None
if self.sa_filter or force:
self._delete_session_affinity_filter(self.safilter_name)
self.sa_filter = None
if self.be_policy or force:
self._delete_backend_policy(self.bepolicy_name)
self.be_policy = None
if self.enable_workload_identity and (
self.service_account or force
):
self._revoke_workload_identity_user(
gcp_iam=self.gcp_iam,
gcp_service_account=self.gcp_service_account,
service_account_name=self.service_account_name,
)
self._delete_service_account(self.service_account_name)
self.service_account = None
self._cleanup_namespace(force=(force_namespace and force))
finally:
self._stop()
# pylint: enable=arguments-differ

@ -1,953 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Common functionality for running xDS Test Client and Server on Kubernetes.
"""
from abc import ABCMeta
import contextlib
import dataclasses
import datetime
import logging
import pathlib
from typing import List, Optional
import mako.template
import yaml
from framework.helpers import retryers
import framework.helpers.datetime
import framework.helpers.highlighter
import framework.helpers.rand
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.test_app.runners import base_runner
logger = logging.getLogger(__name__)
# Type aliases
_RunnerError = base_runner.RunnerError
_HighlighterYaml = framework.helpers.highlighter.HighlighterYaml
_helper_datetime = framework.helpers.datetime
_datetime = datetime.datetime
_timedelta = datetime.timedelta
@dataclasses.dataclass(frozen=True)
class RunHistory:
deployment_id: str
time_start_requested: _datetime
time_start_completed: Optional[_datetime]
time_stopped: _datetime
class KubernetesBaseRunner(base_runner.BaseRunner, metaclass=ABCMeta):
# Pylint wants abstract classes to override abstract methods.
# pylint: disable=abstract-method
TEMPLATE_DIR_NAME = "kubernetes-manifests"
TEMPLATE_DIR_RELATIVE_PATH = f"../../../../{TEMPLATE_DIR_NAME}"
ROLE_WORKLOAD_IDENTITY_USER = "roles/iam.workloadIdentityUser"
pod_port_forwarders: List[k8s.PortForwarder]
pod_log_collectors: List[k8s.PodLogCollector]
# Required fields.
k8s_namespace: k8s.KubernetesNamespace
deployment_name: str
image_name: str
gcp_project: str
gcp_service_account: str
gcp_ui_url: str
# Fields with default values.
namespace_template: str = "namespace.yaml"
reuse_namespace: bool = False
# Mutable state. Describes the current run.
namespace: Optional[k8s.V1Namespace] = None
deployment: Optional[k8s.V1Deployment] = None
deployment_id: Optional[str] = None
service_account: Optional[k8s.V1ServiceAccount] = None
time_start_requested: Optional[_datetime] = None
time_start_completed: Optional[_datetime] = None
time_stopped: Optional[_datetime] = None
# The history of all runs performed by this runner.
run_history: List[RunHistory]
def __init__(
self,
k8s_namespace: k8s.KubernetesNamespace,
*,
deployment_name: str,
image_name: str,
gcp_project: str,
gcp_service_account: str,
gcp_ui_url: str,
namespace_template: Optional[str] = "namespace.yaml",
reuse_namespace: bool = False,
):
super().__init__()
# Required fields.
self.deployment_name = deployment_name
self.image_name = image_name
self.gcp_project = gcp_project
# Maps GCP service account to Kubernetes service account
self.gcp_service_account = gcp_service_account
self.gcp_ui_url = gcp_ui_url
# Kubernetes namespace resources manager.
self.k8s_namespace = k8s_namespace
if namespace_template:
self.namespace_template = namespace_template
self.reuse_namespace = reuse_namespace
# Mutable state
self.run_history = []
self.pod_port_forwarders = []
self.pod_log_collectors = []
# Highlighter.
self._highlighter = _HighlighterYaml()
def run(self, **kwargs):
del kwargs
if not self.time_stopped and self.time_start_requested:
if self.time_start_completed:
raise RuntimeError(
f"Deployment {self.deployment_name}: has already been"
f" started at {self.time_start_completed.isoformat()}"
)
else:
raise RuntimeError(
f"Deployment {self.deployment_name}: start has already been"
f" requested at {self.time_start_requested.isoformat()}"
)
self._reset_state()
self.time_start_requested = _datetime.now()
self.logs_explorer_link()
if self.reuse_namespace:
self.namespace = self._reuse_namespace()
if not self.namespace:
self.namespace = self._create_namespace(
self.namespace_template, namespace_name=self.k8s_namespace.name
)
def _start_completed(self):
self.time_start_completed = _datetime.now()
def _stop(self):
self.time_stopped = _datetime.now()
if self.time_start_requested and self.deployment_id:
run_history = RunHistory(
deployment_id=self.deployment_id,
time_start_requested=self.time_start_requested,
time_start_completed=self.time_start_completed,
time_stopped=self.time_stopped,
)
self.run_history.append(run_history)
def _reset_state(self):
"""Reset the mutable state of the previous run."""
if self.pod_port_forwarders:
logger.warning(
"Port forwarders weren't cleaned up from the past run: %s",
len(self.pod_port_forwarders),
)
if self.pod_log_collectors:
logger.warning(
"Pod log collectors weren't cleaned up from the past run: %s",
len(self.pod_log_collectors),
)
self.namespace = None
self.deployment = None
self.deployment_id = None
self.service_account = None
self.time_start_requested = None
self.time_start_completed = None
self.time_stopped = None
self.pod_port_forwarders = []
self.pod_log_collectors = []
def _cleanup_namespace(self, *, force=False):
if (self.namespace and not self.reuse_namespace) or force:
self.delete_namespace()
self.namespace = None
def stop_pod_dependencies(self, *, log_drain_sec: int = 0):
# Signal to stop logging early so less drain time needed.
self.maybe_stop_logging()
# Stop port forwarders if any.
for pod_port_forwarder in self.pod_port_forwarders:
pod_port_forwarder.close()
self.pod_port_forwarders = []
for pod_log_collector in self.pod_log_collectors:
if log_drain_sec > 0 and not pod_log_collector.drain_event.is_set():
logger.info(
"Draining logs for %s, timeout %i sec",
pod_log_collector.pod_name,
log_drain_sec,
)
# The close will happen normally at the next message.
pod_log_collector.drain_event.wait(timeout=log_drain_sec)
# Note this will be called from the main thread and may cause
# a race for the log file. Still, at least it'll flush the buffers.
pod_log_collector.flush()
self.pod_log_collectors = []
def get_pod_restarts(self, deployment: k8s.V1Deployment) -> int:
if not self.k8s_namespace or not deployment:
return 0
total_restart: int = 0
pods: List[k8s.V1Pod] = self.k8s_namespace.list_deployment_pods(
deployment
)
for pod in pods:
total_restart += sum(
status.restart_count for status in pod.status.container_statuses
)
return total_restart
@classmethod
def _render_template(cls, template_file, **kwargs):
template = mako.template.Template(filename=str(template_file))
return template.render(**kwargs)
@classmethod
def _manifests_from_yaml_file(cls, yaml_file):
with open(yaml_file) as f:
with contextlib.closing(yaml.safe_load_all(f)) as yml:
for manifest in yml:
yield manifest
@classmethod
def _manifests_from_str(cls, document):
with contextlib.closing(yaml.safe_load_all(document)) as yml:
for manifest in yml:
yield manifest
@classmethod
def _template_file_from_name(cls, template_name):
templates_path = (
pathlib.Path(__file__).parent / cls.TEMPLATE_DIR_RELATIVE_PATH
)
return templates_path.joinpath(template_name).resolve()
def _create_from_template(
self,
template_name,
*,
custom_object: bool = False,
**kwargs,
) -> object:
template_file = self._template_file_from_name(template_name)
logger.debug("Loading k8s manifest template: %s", template_file)
yaml_doc = self._render_template(template_file, **kwargs)
logger.info(
"Rendered template %s/%s:\n%s",
self.TEMPLATE_DIR_NAME,
template_name,
self._highlighter.highlight(yaml_doc),
)
manifests = self._manifests_from_str(yaml_doc)
manifest = next(manifests)
# Error out on multi-document yaml
if next(manifests, False):
raise _RunnerError(
f"Exactly one document expected in manifest {template_file}"
)
k8s_object = self.k8s_namespace.create_single_resource(
manifest,
custom_object=custom_object,
)
logger.info("%s %s created", k8s_object.kind, k8s_object.metadata.name)
return k8s_object
def _reuse_deployment(self, deployment_name) -> k8s.V1Deployment:
deployment = self.k8s_namespace.get_deployment(deployment_name)
# TODO(sergiitk): check if good or must be recreated
return deployment
def _reuse_service(self, service_name) -> k8s.V1Service:
service = self.k8s_namespace.get_service(service_name)
logger.info("Reusing service: %s", service_name)
# TODO(sergiitk): check if good or must be recreated
return service
def _reuse_namespace(self) -> k8s.V1Namespace:
logger.info("Reusing namespace: %s", self.k8s_namespace.name)
return self.k8s_namespace.get()
def _create_pod_monitoring(self, template, **kwargs) -> None:
if not kwargs["namespace_name"]:
raise _RunnerError(
"namespace_name required to create PodMonitoring resource"
)
if not kwargs["deployment_id"]:
raise _RunnerError(
"deployment_id required to create PodMonitoring resource"
)
if not kwargs["pod_monitoring_name"]:
raise _RunnerError(
"pod_monitoring_name required to create PodMonitoring resource"
)
pod_monitoring = self._create_from_template(
template, custom_object=True, **kwargs
)
if pod_monitoring.metadata.namespace != kwargs["namespace_name"]:
raise _RunnerError(
"PodMonitoring resource created with unexpected namespace: "
f"{pod_monitoring.metadata.namespace}"
)
logger.debug(
"PodMonitoring %s created at %s",
pod_monitoring.metadata.name,
pod_monitoring.metadata.creation_timestamp,
)
return pod_monitoring
def _create_namespace(self, template, **kwargs) -> k8s.V1Namespace:
namespace = self._create_from_template(template, **kwargs)
if not isinstance(namespace, k8s.V1Namespace):
raise _RunnerError(
f"Expected V1Namespace to be created from manifest {template}"
)
if namespace.metadata.name != kwargs["namespace_name"]:
raise _RunnerError(
"V1Namespace created with unexpected name: "
f"{namespace.metadata.name}"
)
logger.debug(
"V1Namespace %s created at %s",
namespace.metadata.self_link,
namespace.metadata.creation_timestamp,
)
return namespace
@classmethod
def _get_workload_identity_member_name(
cls, project, namespace_name, service_account_name
):
"""
Returns workload identity member name used to authenticate Kubernetes
service accounts.
https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity
"""
return (
f"serviceAccount:{project}.svc.id.goog"
f"[{namespace_name}/{service_account_name}]"
)
def _grant_workload_identity_user(
self, *, gcp_iam, gcp_service_account, service_account_name
):
workload_identity_member = self._get_workload_identity_member_name(
gcp_iam.project, self.k8s_namespace.name, service_account_name
)
logger.info(
"Granting %s to %s for GCP Service Account %s",
self.ROLE_WORKLOAD_IDENTITY_USER,
workload_identity_member,
gcp_service_account,
)
gcp_iam.add_service_account_iam_policy_binding(
gcp_service_account,
self.ROLE_WORKLOAD_IDENTITY_USER,
workload_identity_member,
)
def _revoke_workload_identity_user(
self, *, gcp_iam, gcp_service_account, service_account_name
):
workload_identity_member = self._get_workload_identity_member_name(
gcp_iam.project, self.k8s_namespace.name, service_account_name
)
logger.info(
"Revoking %s from %s for GCP Service Account %s",
self.ROLE_WORKLOAD_IDENTITY_USER,
workload_identity_member,
gcp_service_account,
)
try:
gcp_iam.remove_service_account_iam_policy_binding(
gcp_service_account,
self.ROLE_WORKLOAD_IDENTITY_USER,
workload_identity_member,
)
except gcp.api.Error as error:
logger.warning(
"Failed %s from %s for Service Account %s: %r",
self.ROLE_WORKLOAD_IDENTITY_USER,
workload_identity_member,
gcp_service_account,
error,
)
def _create_service_account(
self, template, **kwargs
) -> k8s.V1ServiceAccount:
resource = self._create_from_template(template, **kwargs)
if not isinstance(resource, k8s.V1ServiceAccount):
raise _RunnerError(
"Expected V1ServiceAccount to be created "
f"from manifest {template}"
)
if resource.metadata.name != kwargs["service_account_name"]:
raise _RunnerError(
"V1ServiceAccount created with unexpected name: "
f"{resource.metadata.name}"
)
logger.debug(
"V1ServiceAccount %s created at %s",
resource.metadata.self_link,
resource.metadata.creation_timestamp,
)
return resource
def delete_pod_async(self, pod_name: str):
logger.info(
"Initiating deletion of pod %s in namespace %s",
pod_name,
self.k8s_namespace.name,
)
self.k8s_namespace.delete_pod_async(pod_name)
def _create_deployment(self, template, **kwargs) -> k8s.V1Deployment:
# Not making deployment_name an explicit kwarg to be consistent with
# the rest of the _create_* methods, which pass kwargs as-is
# to _create_from_template(), so that the kwargs dict is unpacked into
# template variables and their values.
if "deployment_name" not in kwargs:
raise TypeError(
"Missing required keyword-only argument: deployment_name"
)
# Automatically apply random deployment_id to use in the matchLabels
# to prevent selecting pods in the same namespace belonging to
# a different deployment.
if "deployment_id" not in kwargs:
rand_id: str = framework.helpers.rand.rand_string(lowercase=True)
# Fun edge case: when rand_string() happen to generate numbers only,
# yaml interprets deployment_id label value as an integer,
# but k8s expects label values to be strings. Lol. K8s responds
# with a barely readable 400 Bad Request error: 'ReadString: expects
# \" or n, but found 9, error found in #10 byte of ...|ent_id'.
# Prepending deployment name forces deployment_id into a string,
# as well as it's just a better description.
self.deployment_id = f'{kwargs["deployment_name"]}-{rand_id}'
kwargs["deployment_id"] = self.deployment_id
else:
self.deployment_id = kwargs["deployment_id"]
deployment = self._create_from_template(template, **kwargs)
if not isinstance(deployment, k8s.V1Deployment):
raise _RunnerError(
f"Expected V1Deployment to be created from manifest {template}"
)
if deployment.metadata.name != kwargs["deployment_name"]:
raise _RunnerError(
"V1Deployment created with unexpected name: "
f"{deployment.metadata.name}"
)
logger.debug(
"V1Deployment %s created at %s",
deployment.metadata.self_link,
deployment.metadata.creation_timestamp,
)
return deployment
def _create_gamma_route(self, template, **kwargs) -> k8s.GammaHttpRoute:
route = self._create_from_template(
template,
custom_object=True,
**kwargs,
)
if not (
isinstance(route, k8s.GammaHttpRoute) and route.kind == "HTTPRoute"
):
raise _RunnerError(
f"Expected ResourceInstance[HTTPRoute] to be created from"
f" manifest {template}"
)
if route.metadata.name != kwargs["route_name"]:
raise _RunnerError(
"ResourceInstance[HTTPRoute] created with unexpected name: "
f"{route.metadata.name}"
)
logger.debug(
"ResourceInstance[HTTPRoute] %s created at %s",
route.metadata.name,
route.metadata.creation_timestamp,
)
return route
def _create_session_affinity_policy(
self, template, **kwargs
) -> k8s.GcpSessionAffinityPolicy:
saPolicy = self._create_from_template(
template,
custom_object=True,
**kwargs,
)
if not (
isinstance(saPolicy, k8s.GcpSessionAffinityPolicy)
and saPolicy.kind == "GCPSessionAffinityPolicy"
):
raise _RunnerError(
f"Expected ResourceInstance[GCPSessionAffinityPolicy] to be"
f" created from manifest {template}"
)
if saPolicy.metadata.name != kwargs["session_affinity_policy_name"]:
raise _RunnerError(
"ResourceInstance[GCPSessionAffinityPolicy] created with"
f" unexpected name: {saPolicy.metadata.name}"
)
logger.debug(
"ResourceInstance[GCPSessionAffinityPolicy] %s created at %s",
saPolicy.metadata.name,
saPolicy.metadata.creation_timestamp,
)
return saPolicy
def _create_session_affinity_filter(
self, template, **kwargs
) -> k8s.GcpSessionAffinityFilter:
saFilter = self._create_from_template(
template,
custom_object=True,
**kwargs,
)
if not (
isinstance(saFilter, k8s.GcpSessionAffinityFilter)
and saFilter.kind == "GCPSessionAffinityFilter"
):
raise _RunnerError(
f"Expected ResourceInstance[GCPSessionAffinityFilter] to be"
f" created from manifest {template}"
)
if saFilter.metadata.name != kwargs["session_affinity_filter_name"]:
raise _RunnerError(
"ResourceInstance[GCPSessionAffinityFilter] created with"
f" unexpected name: {saFilter.metadata.name}"
)
logger.debug(
"ResourceInstance[GCPSessionAffinityFilter] %s created at %s",
saFilter.metadata.name,
saFilter.metadata.creation_timestamp,
)
return saFilter
def _create_backend_policy(
self, template, **kwargs
) -> k8s.GcpBackendPolicy:
be_policy = self._create_from_template(
template,
custom_object=True,
**kwargs,
)
if not (
isinstance(be_policy, k8s.GcpBackendPolicy)
and be_policy.kind == "GCPBackendPolicy"
):
raise _RunnerError(
f"Expected ResourceInstance[GCPBackendPolicy] to be"
f" created from manifest {template}"
)
if be_policy.metadata.name != kwargs["be_policy_name"]:
raise _RunnerError(
"ResourceInstance[GCPBackendPolicy] created with"
f" unexpected name: {be_policy.metadata.name}"
)
logger.debug(
"ResourceInstance[GCPBackendPolicy] %s created at %s",
be_policy.metadata.name,
be_policy.metadata.creation_timestamp,
)
return be_policy
def _create_service(self, template, **kwargs) -> k8s.V1Service:
service = self._create_from_template(template, **kwargs)
if not isinstance(service, k8s.V1Service):
raise _RunnerError(
f"Expected V1Service to be created from manifest {template}"
)
if service.metadata.name != kwargs["service_name"]:
raise _RunnerError(
"V1Service created with unexpected name: "
f"{service.metadata.name}"
)
logger.debug(
"V1Service %s created at %s",
service.metadata.self_link,
service.metadata.creation_timestamp,
)
return service
def _delete_gamma_route(self, name, wait_for_deletion=True):
logger.info("Deleting HTTPRoute %s", name)
try:
self.k8s_namespace.delete_gamma_route(name)
except k8s.NotFound:
logger.debug(
"HTTPRoute %s not deleted since it doesn't exist", name
)
return
except retryers.RetryError as e:
logger.warning("HTTPRoute %s deletion failed: %s", name, e)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_get_gamma_route_deleted(name)
logger.info("HTTPRoute %s deleted", name)
def _delete_session_affinity_policy(self, name, wait_for_deletion=True):
logger.info("Deleting GCPSessionAffinityPolicy %s", name)
try:
self.k8s_namespace.delete_session_affinity_policy(name)
except k8s.NotFound:
logger.debug(
"GCPSessionAffinityPolicy %s not deleted since it"
" doesn't exist",
name,
)
return
except retryers.RetryError as e:
logger.warning(
"GCPSessionAffinityPolicy %s deletion failed: %s", name, e
)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_get_session_affinity_policy_deleted(
name
)
logger.info("GCPSessionAffinityPolicy %s deleted", name)
def _delete_session_affinity_filter(self, name, wait_for_deletion=True):
logger.info("Deleting GCPSessionAffinityFilter %s", name)
try:
self.k8s_namespace.delete_session_affinity_filter(name)
except k8s.NotFound:
logger.debug(
"GCPSessionAffinityFilter %s not deleted since it"
" doesn't exist",
name,
)
return
except retryers.RetryError as e:
logger.warning(
"GCPSessionAffinityFilter %s deletion failed: %s", name, e
)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_get_session_affinity_filter_deleted(
name
)
logger.info("GCPSessionAffinityFilter %s deleted", name)
def _delete_backend_policy(self, name, wait_for_deletion=True):
logger.info("Deleting GCPBackendPolicy %s", name)
try:
self.k8s_namespace.delete_backend_policy(name)
except k8s.NotFound:
logger.debug(
"GGCPBackendPolicy %s not deleted since it doesn't exist", name
)
return
except retryers.RetryError as e:
logger.warning("GGCPBackendPolicy %s deletion failed: %s", name, e)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_get_backend_policy_deleted(name)
logger.info("GCPBackendPolicy %s deleted", name)
def _delete_deployment(self, name, wait_for_deletion=True):
logger.info("Deleting deployment %s", name)
self.stop_pod_dependencies()
try:
self.k8s_namespace.delete_deployment(name)
except k8s.NotFound:
logger.debug(
"Deployment %s not deleted since it doesn't exist", name
)
return
except retryers.RetryError as e:
logger.warning("Deployment %s deletion failed: %s", name, e)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_deployment_deleted(name)
logger.info("Deployment %s deleted", name)
def _delete_service(self, name, wait_for_deletion=True):
logger.info("Deleting service %s", name)
try:
self.k8s_namespace.delete_service(name)
except k8s.NotFound:
logger.debug("Service %s not deleted since it doesn't exist", name)
return
except retryers.RetryError as e:
logger.warning("Service %s deletion failed: %s", name, e)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_service_deleted(name)
logger.info("Service %s deleted", name)
def _delete_service_account(self, name, wait_for_deletion=True):
logger.info("Deleting service account %s", name)
try:
self.k8s_namespace.delete_service_account(name)
except k8s.NotFound:
logger.debug(
"Service account %s not deleted since it doesn't exist", name
)
return
except retryers.RetryError as e:
logger.warning("Service account %s deletion failed: %s", name, e)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_service_account_deleted(name)
logger.info("Service account %s deleted", name)
def delete_namespace(self, wait_for_deletion=True):
logger.info("Deleting namespace %s", self.k8s_namespace.name)
try:
self.k8s_namespace.delete()
except k8s.NotFound:
logger.debug(
"Namespace %s not deleted since it doesn't exist",
self.k8s_namespace.name,
)
return
except retryers.RetryError as e:
logger.warning(
"Namespace %s deletion failed: %s", self.k8s_namespace.name, e
)
return
if wait_for_deletion:
self.k8s_namespace.wait_for_namespace_deleted()
logger.info("Namespace %s deleted", self.k8s_namespace.name)
def _wait_deployment_with_available_replicas(self, name, count=1, **kwargs):
logger.info(
"Waiting for deployment %s to report %s available replica(s)",
name,
count,
)
self.k8s_namespace.wait_for_deployment_available_replicas(
name, count, **kwargs
)
deployment = self.k8s_namespace.get_deployment(name)
logger.info(
"Deployment %s has %i replicas available",
deployment.metadata.name,
deployment.status.available_replicas,
)
def _wait_deployment_pod_count(
self, deployment: k8s.V1Deployment, count: int = 1, **kwargs
) -> List[str]:
logger.info(
"Waiting for deployment %s to initialize %s pod(s)",
deployment.metadata.name,
count,
)
self.k8s_namespace.wait_for_deployment_replica_count(
deployment, count, **kwargs
)
pods = self.k8s_namespace.list_deployment_pods(deployment)
pod_names = [pod.metadata.name for pod in pods]
logger.info(
"Deployment %s initialized %i pod(s): %s",
deployment.metadata.name,
count,
pod_names,
)
# Pods may not be started yet, just return the names.
return pod_names
def _wait_pod_started(self, name, **kwargs) -> k8s.V1Pod:
logger.info("Waiting for pod %s to start", name)
self.k8s_namespace.wait_for_pod_started(name, **kwargs)
pod = self.k8s_namespace.get_pod(name)
logger.info(
"Pod %s ready, IP: %s", pod.metadata.name, pod.status.pod_ip
)
return pod
def _start_port_forwarding_pod(
self, pod: k8s.V1Pod, remote_port: int
) -> k8s.PortForwarder:
logger.info(
"LOCAL DEV MODE: Enabling port forwarding to %s:%s",
pod.status.pod_ip,
remote_port,
)
port_forwarder = self.k8s_namespace.port_forward_pod(pod, remote_port)
self.pod_port_forwarders.append(port_forwarder)
return port_forwarder
def _start_logging_pod(
self, pod: k8s.V1Pod, *, log_to_stdout: bool = False
) -> k8s.PodLogCollector:
pod_name = pod.metadata.name
logfile_name = f"{self.k8s_namespace.name}_{pod_name}.log"
log_path = self.logs_subdir / logfile_name
logger.info(
"Enabling log collection from pod %s to %s",
pod_name,
log_path.relative_to(self.logs_subdir.parent.parent),
)
pod_log_collector = self.k8s_namespace.pod_start_logging(
pod_name=pod_name,
log_path=log_path,
log_stop_event=self.log_stop_event,
log_to_stdout=log_to_stdout,
# Timestamps are enabled because not all language implementations
# include them.
# TODO(sergiitk): Make this setting language-specific.
log_timestamps=True,
)
self.pod_log_collectors.append(pod_log_collector)
return pod_log_collector
def _wait_service_neg_status_annotation(
self,
service_name: str,
service_port: int,
**kwargs,
) -> None:
logger.info(
"Waiting for '%s' annotation for a NEG at port %s to be assigned to"
" Kubernetes Service %s in namespace %s",
self.k8s_namespace.NEG_STATUS_ANNOTATION,
service_port,
service_name,
self.k8s_namespace.name,
)
self.k8s_namespace.wait_for_service_neg_status_annotation(
service_name, **kwargs
)
neg_name, neg_zones = self.k8s_namespace.parse_service_neg_status(
service_name, service_port
)
logger.info(
"Detected '%s' annotation for Kubernetes Service %s, namespace %s:"
" neg_name=%s, port=%s, zones=%s",
self.k8s_namespace.NEG_STATUS_ANNOTATION,
service_name,
self.k8s_namespace.name,
neg_name,
service_port,
neg_zones,
)
def logs_explorer_link(self):
"""Prints GCP Logs Explorer link to all runs of the deployment."""
self._logs_explorer_link(
deployment_name=self.deployment_name,
namespace_name=self.k8s_namespace.name,
gcp_project=self.gcp_project,
gcp_ui_url=self.gcp_ui_url,
)
def logs_explorer_run_history_links(self):
"""Prints a separate GCP Logs Explorer link for each run *completed* by
the runner.
This excludes the current run, if it hasn't been completed.
"""
if not self.run_history:
logger.info("No completed deployments of %s", self.deployment_name)
return
for run in self.run_history:
self._logs_explorer_link(
deployment_name=self.deployment_name,
namespace_name=self.k8s_namespace.name,
gcp_project=self.gcp_project,
gcp_ui_url=self.gcp_ui_url,
deployment_id=run.deployment_id,
start_time=run.time_start_requested,
end_time=run.time_stopped,
)
@classmethod
def _logs_explorer_link(
cls,
*,
deployment_name: str,
namespace_name: str,
gcp_project: str,
gcp_ui_url: str,
deployment_id: Optional[str] = None,
start_time: Optional[_datetime] = None,
end_time: Optional[_datetime] = None,
):
"""Output the link to test server/client logs in GCP Logs Explorer."""
if not start_time:
start_time = _datetime.now()
if not end_time:
end_time = start_time + _timedelta(minutes=30)
logs_start = _helper_datetime.iso8601_utc_time(start_time)
logs_end = _helper_datetime.iso8601_utc_time(end_time)
request = {"timeRange": f"{logs_start}/{logs_end}"}
query = {
"resource.type": "k8s_container",
"resource.labels.project_id": gcp_project,
"resource.labels.container_name": deployment_name,
"resource.labels.namespace_name": namespace_name,
}
if deployment_id:
query['labels."k8s-pod/deployment_id"'] = deployment_id
link = cls._logs_explorer_link_from_params(
gcp_ui_url=gcp_ui_url,
gcp_project=gcp_project,
query=query,
request=request,
)
link_to = deployment_id if deployment_id else deployment_name
# A whitespace at the end to indicate the end of the url.
logger.info("GCP Logs Explorer link to %s:\n%s ", link_to, link)
@classmethod
def _make_namespace_name(
cls, resource_prefix: str, resource_suffix: str, name: str
) -> str:
"""A helper to make consistent test app kubernetes namespace name
for given resource prefix and suffix."""
parts = [resource_prefix, name]
# Avoid trailing dash when the suffix is empty.
if resource_suffix:
parts.append(resource_suffix)
return "-".join(parts)

@ -1,238 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Run xDS Test Client on Kubernetes.
"""
import logging
from typing import Optional
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.test_app.client_app import XdsTestClient
from framework.test_app.runners.k8s import k8s_base_runner
logger = logging.getLogger(__name__)
class KubernetesClientRunner(k8s_base_runner.KubernetesBaseRunner):
# Required fields.
xds_server_uri: str
stats_port: int
deployment_template: str
enable_workload_identity: bool
debug_use_port_forwarding: bool
td_bootstrap_image: str
network: str
# Optional fields.
service_account_name: Optional[str] = None
service_account_template: Optional[str] = None
gcp_iam: Optional[gcp.iam.IamV1] = None
def __init__( # pylint: disable=too-many-locals
self,
k8s_namespace: k8s.KubernetesNamespace,
*,
deployment_name: str,
image_name: str,
td_bootstrap_image: str,
network="default",
xds_server_uri: Optional[str] = None,
gcp_api_manager: gcp.api.GcpApiManager,
gcp_project: str,
gcp_service_account: str,
service_account_name: Optional[str] = None,
stats_port: int = 8079,
deployment_template: str = "client.deployment.yaml",
service_account_template: str = "service-account.yaml",
reuse_namespace: bool = False,
namespace_template: Optional[str] = None,
debug_use_port_forwarding: bool = False,
enable_workload_identity: bool = True,
):
super().__init__(
k8s_namespace,
deployment_name=deployment_name,
image_name=image_name,
gcp_project=gcp_project,
gcp_service_account=gcp_service_account,
gcp_ui_url=gcp_api_manager.gcp_ui_url,
namespace_template=namespace_template,
reuse_namespace=reuse_namespace,
)
# Settings
self.stats_port = stats_port
self.deployment_template = deployment_template
self.enable_workload_identity = enable_workload_identity
self.debug_use_port_forwarding = debug_use_port_forwarding
# Used by the TD bootstrap generator.
self.td_bootstrap_image = td_bootstrap_image
self.network = network
self.xds_server_uri = xds_server_uri
# Workload identity settings:
if self.enable_workload_identity:
# Kubernetes service account.
self.service_account_name = service_account_name or deployment_name
self.service_account_template = service_account_template
# GCP IAM API used to grant allow workload service accounts
# permission to use GCP service account identity.
self.gcp_iam = gcp.iam.IamV1(gcp_api_manager, gcp_project)
def run( # pylint: disable=arguments-differ
self,
*,
server_target,
rpc="UnaryCall",
qps=25,
metadata="",
secure_mode=False,
config_mesh=None,
generate_mesh_id=False,
print_response=False,
log_to_stdout: bool = False,
enable_csm_observability: bool = False,
) -> XdsTestClient:
logger.info(
(
'Deploying xDS test client "%s" to k8s namespace %s: '
"server_target=%s rpc=%s qps=%s metadata=%r secure_mode=%s "
"print_response=%s"
),
self.deployment_name,
self.k8s_namespace.name,
server_target,
rpc,
qps,
metadata,
secure_mode,
print_response,
)
super().run()
if self.enable_workload_identity:
# Allow Kubernetes service account to use the GCP service account
# identity.
self._grant_workload_identity_user(
gcp_iam=self.gcp_iam,
gcp_service_account=self.gcp_service_account,
service_account_name=self.service_account_name,
)
# Create service account
self.service_account = self._create_service_account(
self.service_account_template,
service_account_name=self.service_account_name,
namespace_name=self.k8s_namespace.name,
gcp_service_account=self.gcp_service_account,
)
# Always create a new deployment
self.deployment = self._create_deployment(
self.deployment_template,
deployment_name=self.deployment_name,
image_name=self.image_name,
namespace_name=self.k8s_namespace.name,
service_account_name=self.service_account_name,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
stats_port=self.stats_port,
server_target=server_target,
rpc=rpc,
qps=qps,
metadata=metadata,
secure_mode=secure_mode,
config_mesh=config_mesh,
generate_mesh_id=generate_mesh_id,
print_response=print_response,
enable_csm_observability=enable_csm_observability,
)
# Create a PodMonitoring resource if CSM Observability is enabled
# This is GMP (Google Managed Prometheus)
if enable_csm_observability:
self._create_pod_monitoring(
"csm/pod-monitoring.yaml",
namespace_name=self.k8s_namespace.name,
deployment_id=self.deployment_id,
pod_monitoring_name="%s-gmp" % self.deployment_id,
)
# Load test client pod. We need only one client at the moment
pod_name = self._wait_deployment_pod_count(self.deployment)[0]
pod: k8s.V1Pod = self._wait_pod_started(pod_name)
if self.should_collect_logs:
self._start_logging_pod(pod, log_to_stdout=log_to_stdout)
# Verify the deployment reports all pods started as well.
self._wait_deployment_with_available_replicas(self.deployment_name)
self._start_completed()
return self._xds_test_client_for_pod(pod, server_target=server_target)
def _xds_test_client_for_pod(
self, pod: k8s.V1Pod, *, server_target: str
) -> XdsTestClient:
if self.debug_use_port_forwarding:
pf = self._start_port_forwarding_pod(pod, self.stats_port)
rpc_port, rpc_host = pf.local_port, pf.local_address
else:
rpc_port, rpc_host = self.stats_port, None
return XdsTestClient(
ip=pod.status.pod_ip,
rpc_port=rpc_port,
server_target=server_target,
hostname=pod.metadata.name,
rpc_host=rpc_host,
)
# pylint: disable=arguments-differ
def cleanup(self, *, force=False, force_namespace=False):
# TODO(sergiitk): rename to stop().
try:
if self.deployment or force:
self._delete_deployment(self.deployment_name)
self.deployment = None
if self.enable_workload_identity and (
self.service_account or force
):
self._revoke_workload_identity_user(
gcp_iam=self.gcp_iam,
gcp_service_account=self.gcp_service_account,
service_account_name=self.service_account_name,
)
self._delete_service_account(self.service_account_name)
self.service_account = None
self._cleanup_namespace(force=force_namespace and force)
finally:
self._stop()
# pylint: enable=arguments-differ
@classmethod
def make_namespace_name(
cls, resource_prefix: str, resource_suffix: str, name: str = "client"
) -> str:
"""A helper to make consistent XdsTestClient kubernetes namespace name
for given resource prefix and suffix.
Note: the idea is to intentionally produce different namespace name for
the test server, and the test client, as that closely mimics real-world
deployments.
"""
return cls._make_namespace_name(resource_prefix, resource_suffix, name)

@ -1,324 +0,0 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Run xDS Test Client on Kubernetes.
"""
import logging
from typing import List, Optional
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.test_app.runners.k8s import k8s_base_runner
from framework.test_app.server_app import XdsTestServer
logger = logging.getLogger(__name__)
class KubernetesServerRunner(k8s_base_runner.KubernetesBaseRunner):
DEFAULT_TEST_PORT = 8080
DEFAULT_MAINTENANCE_PORT = 8080
DEFAULT_SECURE_MODE_MAINTENANCE_PORT = 8081
# Required fields.
deployment_template: str
service_name: str
service_template: str
reuse_service: bool
enable_workload_identity: bool
debug_use_port_forwarding: bool
gcp_neg_name: str
td_bootstrap_image: str
xds_server_uri: str
network: str
# Optional fields.
service_account_name: Optional[str] = None
service_account_template: Optional[str] = None
gcp_iam: Optional[gcp.iam.IamV1] = None
# Mutable state.
service: Optional[k8s.V1Service] = None
def __init__( # pylint: disable=too-many-locals
self,
k8s_namespace: k8s.KubernetesNamespace,
*,
deployment_name: str,
image_name: str,
td_bootstrap_image: str,
network: str = "default",
xds_server_uri: Optional[str] = None,
gcp_api_manager: gcp.api.GcpApiManager,
gcp_project: str,
gcp_service_account: str,
service_account_name: Optional[str] = None,
service_name: Optional[str] = None,
neg_name: Optional[str] = None,
deployment_template: str = "server.deployment.yaml",
service_account_template: str = "service-account.yaml",
service_template: str = "server.service.yaml",
reuse_service: bool = False,
reuse_namespace: bool = False,
namespace_template: Optional[str] = None,
debug_use_port_forwarding: bool = False,
enable_workload_identity: bool = True,
):
super().__init__(
k8s_namespace,
deployment_name=deployment_name,
image_name=image_name,
gcp_project=gcp_project,
gcp_service_account=gcp_service_account,
gcp_ui_url=gcp_api_manager.gcp_ui_url,
namespace_template=namespace_template,
reuse_namespace=reuse_namespace,
)
# Settings
self.deployment_template = deployment_template
self.service_name = service_name or deployment_name
self.service_template = service_template
self.reuse_service = reuse_service
self.enable_workload_identity = enable_workload_identity
self.debug_use_port_forwarding = debug_use_port_forwarding
# GCP Network Endpoint Group.
self.gcp_neg_name = neg_name or (
f"{self.k8s_namespace.name}-{self.service_name}"
)
# Used by the TD bootstrap generator.
self.td_bootstrap_image = td_bootstrap_image
self.network = network
self.xds_server_uri = xds_server_uri
# Workload identity settings:
if self.enable_workload_identity:
# Kubernetes service account.
self.service_account_name = service_account_name or deployment_name
self.service_account_template = service_account_template
# GCP IAM API used to grant allow workload service accounts
# permission to use GCP service account identity.
self.gcp_iam = gcp.iam.IamV1(gcp_api_manager, gcp_project)
def run( # pylint: disable=arguments-differ,too-many-branches
self,
*,
test_port: int = DEFAULT_TEST_PORT,
maintenance_port: Optional[int] = None,
secure_mode: bool = False,
replica_count: int = 1,
log_to_stdout: bool = False,
bootstrap_version: Optional[str] = None,
) -> List[XdsTestServer]:
if not maintenance_port:
maintenance_port = self._get_default_maintenance_port(secure_mode)
# Implementation detail: in secure mode, maintenance ("backchannel")
# port must be different from the test port so communication with
# maintenance services can be reached independently of the security
# configuration under test.
if secure_mode and maintenance_port == test_port:
raise ValueError(
"port and maintenance_port must be different "
"when running test server in secure mode"
)
# To avoid bugs with comparing wrong types.
if not (
isinstance(test_port, int) and isinstance(maintenance_port, int)
):
raise TypeError("Port numbers must be integer")
if secure_mode and not self.enable_workload_identity:
raise ValueError("Secure mode requires Workload Identity enabled.")
logger.info(
(
'Deploying xDS test server "%s" to k8s namespace %s:'
" test_port=%s maintenance_port=%s secure_mode=%s"
" replica_count=%s"
),
self.deployment_name,
self.k8s_namespace.name,
test_port,
maintenance_port,
secure_mode,
replica_count,
)
super().run()
# Reuse existing if requested, create a new deployment when missing.
# Useful for debugging to avoid NEG loosing relation to deleted service.
if self.reuse_service:
self.service = self._reuse_service(self.service_name)
if not self.service:
self.service = self._create_service(
self.service_template,
service_name=self.service_name,
namespace_name=self.k8s_namespace.name,
deployment_name=self.deployment_name,
neg_name=self.gcp_neg_name,
test_port=test_port,
)
self._wait_service_neg_status_annotation(self.service_name, test_port)
if self.enable_workload_identity:
# Allow Kubernetes service account to use the GCP service account
# identity.
self._grant_workload_identity_user(
gcp_iam=self.gcp_iam,
gcp_service_account=self.gcp_service_account,
service_account_name=self.service_account_name,
)
# Create service account
self.service_account = self._create_service_account(
self.service_account_template,
service_account_name=self.service_account_name,
namespace_name=self.k8s_namespace.name,
gcp_service_account=self.gcp_service_account,
)
# Always create a new deployment
self.deployment = self._create_deployment(
self.deployment_template,
deployment_name=self.deployment_name,
image_name=self.image_name,
namespace_name=self.k8s_namespace.name,
service_account_name=self.service_account_name,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
replica_count=replica_count,
test_port=test_port,
maintenance_port=maintenance_port,
secure_mode=secure_mode,
bootstrap_version=bootstrap_version,
)
return self._make_servers_for_deployment(
replica_count,
test_port=test_port,
maintenance_port=maintenance_port,
log_to_stdout=log_to_stdout,
secure_mode=secure_mode,
)
def _make_servers_for_deployment(
self,
replica_count,
*,
test_port: int,
maintenance_port: int,
log_to_stdout: bool,
secure_mode: bool = False,
) -> List[XdsTestServer]:
pod_names = self._wait_deployment_pod_count(
self.deployment, replica_count
)
pods = []
for pod_name in pod_names:
pod = self._wait_pod_started(pod_name)
pods.append(pod)
if self.should_collect_logs:
self._start_logging_pod(pod, log_to_stdout=log_to_stdout)
# Verify the deployment reports all pods started as well.
self._wait_deployment_with_available_replicas(
self.deployment_name, replica_count
)
self._start_completed()
servers: List[XdsTestServer] = []
for pod in pods:
servers.append(
self._xds_test_server_for_pod(
pod,
test_port=test_port,
maintenance_port=maintenance_port,
secure_mode=secure_mode,
)
)
return servers
def _get_default_maintenance_port(self, secure_mode: bool) -> int:
if not secure_mode:
maintenance_port = self.DEFAULT_MAINTENANCE_PORT
else:
maintenance_port = self.DEFAULT_SECURE_MODE_MAINTENANCE_PORT
return maintenance_port
def _xds_test_server_for_pod(
self,
pod: k8s.V1Pod,
*,
test_port: int = DEFAULT_TEST_PORT,
maintenance_port: Optional[int] = None,
secure_mode: bool = False,
) -> XdsTestServer:
if maintenance_port is None:
maintenance_port = self._get_default_maintenance_port(secure_mode)
if self.debug_use_port_forwarding:
pf = self._start_port_forwarding_pod(pod, maintenance_port)
rpc_port, rpc_host = pf.local_port, pf.local_address
else:
rpc_port, rpc_host = maintenance_port, None
return XdsTestServer(
ip=pod.status.pod_ip,
rpc_port=test_port,
hostname=pod.metadata.name,
maintenance_port=rpc_port,
secure_mode=secure_mode,
rpc_host=rpc_host,
)
# pylint: disable=arguments-differ
def cleanup(self, *, force=False, force_namespace=False):
# TODO(sergiitk): rename to stop().
try:
if self.deployment or force:
self._delete_deployment(self.deployment_name)
self.deployment = None
if (self.service and not self.reuse_service) or force:
self._delete_service(self.service_name)
self.service = None
if self.enable_workload_identity and (
self.service_account or force
):
self._revoke_workload_identity_user(
gcp_iam=self.gcp_iam,
gcp_service_account=self.gcp_service_account,
service_account_name=self.service_account_name,
)
self._delete_service_account(self.service_account_name)
self.service_account = None
self._cleanup_namespace(force=(force_namespace and force))
finally:
self._stop()
# pylint: enable=arguments-differ
@classmethod
def make_namespace_name(
cls, resource_prefix: str, resource_suffix: str, name: str = "server"
) -> str:
"""A helper to make consistent XdsTestServer kubernetes namespace name
for given resource prefix and suffix.
Note: the idea is to intentionally produce different namespace name for
the test server, and the test client, as that closely mimics real-world
deployments.
"""
return cls._make_namespace_name(resource_prefix, resource_suffix, name)

@ -1,180 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides an interface to xDS Test Server running remotely.
"""
import functools
import logging
from typing import Iterator, Optional
import framework.rpc
from framework.rpc import grpc_channelz
from framework.rpc import grpc_testing
logger = logging.getLogger(__name__)
# Type aliases
_ChannelzServiceClient = grpc_channelz.ChannelzServiceClient
_XdsUpdateHealthServiceClient = grpc_testing.XdsUpdateHealthServiceClient
_HealthClient = grpc_testing.HealthClient
class XdsTestServer(framework.rpc.grpc.GrpcApp):
"""
Represents RPC services implemented in Server component of the xDS test app.
https://github.com/grpc/grpc/blob/master/doc/xds-test-descriptions.md#server
"""
# A unique host name identifying each server replica.
# Server implementation must return this in the SimpleResponse.hostname,
# which client uses as the key in rpcs_by_peer map.
hostname: str
def __init__(
self,
*,
ip: str,
rpc_port: int,
hostname: str,
maintenance_port: Optional[int] = None,
secure_mode: Optional[bool] = False,
xds_host: Optional[str] = None,
xds_port: Optional[int] = None,
rpc_host: Optional[str] = None,
):
super().__init__(rpc_host=(rpc_host or ip))
self.ip = ip
self.rpc_port = rpc_port
self.hostname = hostname
self.maintenance_port = maintenance_port or rpc_port
self.secure_mode = secure_mode
self.xds_host, self.xds_port = xds_host, xds_port
@property
@functools.lru_cache(None)
def channelz(self) -> _ChannelzServiceClient:
return _ChannelzServiceClient(
self._make_channel(self.maintenance_port),
log_target=f"{self.hostname}:{self.maintenance_port}",
)
@property
@functools.lru_cache(None)
def update_health_service_client(self) -> _XdsUpdateHealthServiceClient:
return _XdsUpdateHealthServiceClient(
self._make_channel(self.maintenance_port),
log_target=f"{self.hostname}:{self.maintenance_port}",
)
@property
@functools.lru_cache(None)
def health_client(self) -> _HealthClient:
return _HealthClient(
self._make_channel(self.maintenance_port),
log_target=f"{self.hostname}:{self.maintenance_port}",
)
def set_serving(self):
logger.info("[%s] >> Setting health status to SERVING", self.hostname)
self.update_health_service_client.set_serving()
logger.info(
"[%s] << Health status %s",
self.hostname,
self.health_client.check_health(),
)
def set_not_serving(self):
logger.info(
"[%s] >> Setting health status to NOT_SERVING", self.hostname
)
self.update_health_service_client.set_not_serving()
logger.info(
"[%s] << Health status %s",
self.hostname,
self.health_client.check_health(),
)
def set_xds_address(self, xds_host, xds_port: Optional[int] = None):
self.xds_host, self.xds_port = xds_host, xds_port
@property
def xds_address(self) -> str:
if not self.xds_host:
return ""
if not self.xds_port:
return self.xds_host
return f"{self.xds_host}:{self.xds_port}"
@property
def xds_uri(self) -> str:
if not self.xds_host:
return ""
return f"xds:///{self.xds_address}"
def get_test_server(self) -> grpc_channelz.Server:
"""Return channelz representation of a server running TestService.
Raises:
GrpcApp.NotFound: Test server not found.
"""
server = self.channelz.find_server_listening_on_port(self.rpc_port)
if not server:
raise self.NotFound(
f"[{self.hostname}] Server"
f"listening on port {self.rpc_port} not found"
)
return server
def get_test_server_sockets(self) -> Iterator[grpc_channelz.Socket]:
"""List all sockets of the test server.
Raises:
GrpcApp.NotFound: Test server not found.
"""
server = self.get_test_server()
return self.channelz.list_server_sockets(server)
def get_server_socket_matching_client(
self, client_socket: grpc_channelz.Socket
):
"""Find test server socket that matches given test client socket.
Sockets are matched using TCP endpoints (ip:port), further on "address".
Server socket remote address matched with client socket local address.
Raises:
GrpcApp.NotFound: Server socket matching client socket not found.
"""
client_local = self.channelz.sock_address_to_str(client_socket.local)
logger.debug(
"[%s] Looking for a server socket connected to the client %s",
self.hostname,
client_local,
)
server_socket = self.channelz.find_server_socket_matching_client(
self.get_test_server_sockets(), client_socket
)
if not server_socket:
raise self.NotFound(
f"[{self.hostname}] Socket to client {client_local} not found"
)
logger.info(
"[%s] Found matching socket pair: server(%s) <-> client(%s)",
self.hostname,
self.channelz.sock_addresses_pretty(server_socket),
self.channelz.sock_addresses_pretty(client_socket),
)
return server_socket

@ -1,13 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

@ -1,65 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base test case used for xds test suites."""
from typing import Optional
import unittest
from absl import logging
from absl.testing import absltest
class BaseTestCase(absltest.TestCase):
def run(self, result: Optional[unittest.TestResult] = None) -> None:
super().run(result)
test_errors = [error for test, error in result.errors if test is self]
test_failures = [
failure for test, failure in result.failures if test is self
]
test_unexpected_successes = [
test for test in result.unexpectedSuccesses if test is self
]
test_skipped = next(
(reason for test, reason in result.skipped if test is self),
None,
)
# Assume one test case will only have one status.
if test_errors or test_failures:
logging.info("----- TestCase %s FAILED -----", self.id())
if test_errors:
self._print_error_list(test_errors, is_unexpected_error=True)
if test_failures:
self._print_error_list(test_failures)
elif test_unexpected_successes:
logging.info(
"----- TestCase %s UNEXPECTEDLY SUCCEEDED -----", self.id()
)
elif test_skipped:
logging.info("----- TestCase %s SKIPPED -----", self.id())
logging.info("Reason for skipping: %s", test_skipped)
else:
logging.info("----- TestCase %s PASSED -----", self.id())
def _print_error_list(
self, errors: list[str], is_unexpected_error: bool = False
) -> None:
# FAILUREs are those errors explicitly signalled using
# the TestCase.assert*() methods.
for err in errors:
logging.error(
"%s Traceback in %s:\n%s",
"ERROR" if is_unexpected_error else "FAILURE",
self.id(),
err,
)

@ -1,90 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for stateful session affinity tests.
These utilities must be shared between test environments that configure SSA
via Kubernetes CRDs and environments that configure SSA directly through the
networkservices.googleapis.com API.
"""
import datetime
import logging
from typing import Sequence, Tuple
from framework import xds_k8s_testcase
from framework.helpers import retryers
_XdsKubernetesBaseTestCase = xds_k8s_testcase.XdsKubernetesBaseTestCase
_XdsTestServer = xds_k8s_testcase.XdsTestServer
_XdsTestClient = xds_k8s_testcase.XdsTestClient
_SET_COOKIE_MAX_WAIT_SEC = 300
def get_setcookie_headers(
metadatas_by_peer: dict[str, "MetadataByPeer"]
) -> dict[str, str]:
cookies = dict()
for peer, metadatas in metadatas_by_peer.items():
for rpc_metadatas in metadatas.rpc_metadata:
for metadata in rpc_metadatas.metadata:
if metadata.key.lower() == "set-cookie":
cookies[peer] = metadata.value
return cookies
def assert_eventually_retrieve_cookie_and_server(
test: _XdsKubernetesBaseTestCase,
test_client: _XdsTestClient,
servers: Sequence[_XdsTestServer],
) -> Tuple[str, _XdsTestServer]:
"""Retrieves the initial cookie and corresponding server.
Given a test client and set of backends for which SSA is enabled, samples
a single RPC from the test client to the backends, with metadata collection enabled.
The "set-cookie" header is retrieved and its contents are returned along with the
server to which it corresponds.
Since SSA config is supplied as a separate resource from the Route resource,
there will be periods of time where the SSA config may not be applied. This is
therefore an eventually consistent function.
"""
def _assert_retrieve_cookie_and_server():
lb_stats = test.assertSuccessfulRpcs(test_client, 1)
cookies = get_setcookie_headers(lb_stats.metadatas_by_peer)
test.assertLen(cookies, 1)
hostname = next(iter(cookies.keys()))
cookie = cookies[hostname]
chosen_server_candidates = tuple(
srv for srv in servers if srv.hostname == hostname
)
test.assertLen(chosen_server_candidates, 1)
chosen_server = chosen_server_candidates[0]
return cookie, chosen_server
retryer = retryers.constant_retryer(
wait_fixed=datetime.timedelta(seconds=10),
timeout=datetime.timedelta(seconds=_SET_COOKIE_MAX_WAIT_SEC),
log_level=logging.INFO,
)
try:
return retryer(_assert_retrieve_cookie_and_server)
except retryers.RetryError as retry_error:
logging.exception(
"Rpcs did not go to expected servers before timeout %s",
_SET_COOKIE_MAX_WAIT_SEC,
)
raise retry_error

@ -1,212 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import socket
from absl import flags
from framework.helpers import highlighter
# GCP
PROJECT = flags.DEFINE_string(
"project", default=None, help="(required) GCP Project ID."
)
RESOURCE_PREFIX = flags.DEFINE_string(
"resource_prefix",
default=None,
help=(
"(required) The prefix used to name GCP resources.\n"
"Together with `resource_suffix` used to create unique "
"resource names."
),
)
RESOURCE_SUFFIX = flags.DEFINE_string(
"resource_suffix",
default=None,
help=(
"The suffix used to name GCP resources.\n"
"Together with `resource_prefix` used to create unique "
"resource names.\n"
"(default: test suite will generate a random suffix, based on suite "
"resource management preferences)"
),
)
NETWORK = flags.DEFINE_string(
"network", default="default", help="GCP Network ID"
)
COMPUTE_API_VERSION = flags.DEFINE_string(
"compute_api_version",
default="v1",
help="The version of the GCP Compute API, e.g., v1, v1alpha",
)
# Mirrors --xds-server-uri argument of Traffic Director gRPC Bootstrap
XDS_SERVER_URI = flags.DEFINE_string(
"xds_server_uri", default=None, help="Override Traffic Director server URI."
)
ENSURE_FIREWALL = flags.DEFINE_bool(
"ensure_firewall",
default=False,
help="Ensure the allow-health-check firewall exists before each test case",
)
FIREWALL_SOURCE_RANGE = flags.DEFINE_list(
"firewall_source_range",
default=["35.191.0.0/16", "130.211.0.0/22"],
help="Update the source range of the firewall rule.",
)
FIREWALL_ALLOWED_PORTS = flags.DEFINE_list(
"firewall_allowed_ports",
default=["8080-8100"],
help="Update the allowed ports of the firewall rule.",
)
# Test server
SERVER_NAME = flags.DEFINE_string(
"server_name",
default="psm-grpc-server",
help="The name to use for test server deployments.",
)
SERVER_PORT = flags.DEFINE_integer(
"server_port",
default=8080,
lower_bound=1,
upper_bound=65535,
help="Server test port.\nMust be within --firewall_allowed_ports.",
)
SERVER_MAINTENANCE_PORT = flags.DEFINE_integer(
"server_maintenance_port",
default=None,
lower_bound=1,
upper_bound=65535,
help=(
"Server port running maintenance services: Channelz, CSDS, Health, "
"XdsUpdateHealth, and ProtoReflection (optional).\n"
"Must be within --firewall_allowed_ports.\n"
"(default: the port is chosen automatically based on "
"the security configuration)"
),
)
SERVER_XDS_HOST = flags.DEFINE_string(
"server_xds_host",
default="xds-test-server",
help=(
"The xDS hostname of the test server.\n"
"Together with `server_xds_port` makes test server target URI, "
"xds:///hostname:port"
),
)
# Note: port 0 known to represent a request for dynamically-allocated port
# https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Well-known_ports
SERVER_XDS_PORT = flags.DEFINE_integer(
"server_xds_port",
default=8080,
lower_bound=0,
upper_bound=65535,
help=(
"The xDS port of the test server.\n"
"Together with `server_xds_host` makes test server target URI, "
"xds:///hostname:port\n"
"Must be unique within a GCP project.\n"
"Set to 0 to select any unused port."
),
)
# Test client
CLIENT_NAME = flags.DEFINE_string(
"client_name",
default="psm-grpc-client",
help="The name to use for test client deployments",
)
CLIENT_PORT = flags.DEFINE_integer(
"client_port",
default=8079,
lower_bound=1,
upper_bound=65535,
help=(
"The port test client uses to run gRPC services: Channelz, CSDS, "
"XdsStats, XdsUpdateClientConfigure, and ProtoReflection (optional).\n"
"Doesn't have to be within --firewall_allowed_ports."
),
)
# Testing metadata
TESTING_VERSION = flags.DEFINE_string(
"testing_version",
default=None,
help="The testing gRPC version branch name. Like master, dev, v1.55.x",
)
FORCE_CLEANUP = flags.DEFINE_bool(
"force_cleanup",
default=False,
help="Force resource cleanup, even if not created by this test run",
)
COLLECT_APP_LOGS = flags.DEFINE_bool(
"collect_app_logs",
default=False,
help=(
f"Collect the logs of the xDS Test Client and Server\n"
f"into the test_app_logs/ directory under the log directory.\n"
f"See --log_dir description for configuring the log directory."
),
)
# Needed to configure urllib3 socket timeout, which is infinity by default.
SOCKET_DEFAULT_TIMEOUT = flags.DEFINE_float(
"socket_default_timeout",
default=60,
lower_bound=0,
help=(
"Set the default timeout in seconds on blocking socket operations.\n"
"If zero is given, the new sockets have no timeout. "
),
)
GFE_DEBUG_HEADER = flags.DEFINE_enum(
"gfe_debug_header",
default=None,
enum_values=[
"gfe_response_only",
"all_response",
"request_and_response",
],
help="Whether to enable GFE debug headers and what value to use.",
)
def set_socket_default_timeout_from_flag() -> None:
"""A helper to configure default socket timeout from a flag.
This is known to affect the following pip packages:
- google-api-python-client: has the default timeout set to 60:
https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.http-module.html#build_http
- kubernetes: falls back to urllib3 timeout, which is infinity by default:
https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html#urllib3.util.Timeout
NOTE: Must be called _after_ the flags were parsed by absl, but before
the before KubernetesApiManager or GcpApiManager initialized.
"""
timeout: float = SOCKET_DEFAULT_TIMEOUT.value
# None is inf timeout, which is represented by 0 in the flag.
socket.setdefaulttimeout(None if timeout == 0 else timeout)
flags.adopt_module_key_flags(highlighter)
flags.mark_flags_as_required(
[
"project",
"resource_prefix",
]
)

@ -1,137 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import logging
from typing import Optional
from framework.infrastructure import k8s
import framework.infrastructure.traffic_director_gamma as td_gamma
from framework.test_app import client_app
from framework.test_app import server_app
from framework.test_app.runners.k8s import gamma_server_runner
from framework.test_app.runners.k8s import k8s_xds_client_runner
import framework.xds_k8s_testcase as xds_k8s_testcase
GammaServerRunner = gamma_server_runner.GammaServerRunner
KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner
XdsTestClient = client_app.XdsTestClient
XdsTestServer = server_app.XdsTestServer
logger = logging.getLogger(__name__)
# TODO(sergiitk): [GAMMA] Move into framework/test_cases
class GammaXdsKubernetesTestCase(xds_k8s_testcase.RegularXdsKubernetesTestCase):
server_runner: GammaServerRunner
frontend_service_name: str
pre_stop_hook: Optional[bool] = None
termination_grace_period_seconds: int = 0
def setUp(self):
"""Hook method for setting up the test fixture before exercising it."""
# TODO(sergiitk): [GAMMA] Remove when refactored to be TD-manager-less.
# pylint: disable=bad-super-call
# Skips RegularXdsKubernetesTestCase and IsolatedXdsKubernetesTestCase
# and calls setUp on XdsKubernetesBaseTestCase.
# IsolatedXdsKubernetesTestCase randomizes server_xds_port when it's 0,
# and in GAMMA we always need it unset.
# Calls XdsKubernetesBaseTestCase.setUp():
super(xds_k8s_testcase.IsolatedXdsKubernetesTestCase, self).setUp()
# pylint: enable=bad-super-call
if self.pre_stop_hook is None:
self.pre_stop_hook = False
# Random suffix per test.
self.createRandomSuffix()
# TODO(sergiitk): [GAMMA] Make a TD-manager-less base test case
# TD Manager
self.td = self.initTrafficDirectorManager()
# Generate unique mesh name too.
self.frontend_service_name = (
f"{self.resource_prefix}-{self.resource_suffix.lower()}"
)
# Test Server runner
self.server_namespace = GammaServerRunner.make_namespace_name(
self.resource_prefix, self.resource_suffix
)
self.server_runner = self.initKubernetesServerRunner()
# Test Client runner
self.client_namespace = KubernetesClientRunner.make_namespace_name(
self.resource_prefix, self.resource_suffix
)
self.client_runner = self.initKubernetesClientRunner()
# Cleanup.
self.force_cleanup = True
self.force_cleanup_namespace = True
# TODO(sergiitk): [GAMMA] Make a TD-manager-less base test case
def initTrafficDirectorManager(
self,
) -> td_gamma.TrafficDirectorGammaManager:
return td_gamma.TrafficDirectorGammaManager(
self.gcp_api_manager,
project=self.project,
resource_prefix=self.resource_prefix,
resource_suffix=self.resource_suffix,
network=self.network,
compute_api_version=self.compute_api_version,
)
def initKubernetesServerRunner(self) -> GammaServerRunner:
return GammaServerRunner(
k8s.KubernetesNamespace(
self.k8s_api_manager, self.server_namespace
),
self.frontend_service_name,
deployment_name=self.server_name,
image_name=self.server_image,
td_bootstrap_image=self.td_bootstrap_image,
gcp_project=self.project,
gcp_api_manager=self.gcp_api_manager,
gcp_service_account=self.gcp_service_account,
xds_server_uri=self.xds_server_uri,
network=self.network,
debug_use_port_forwarding=self.debug_use_port_forwarding,
enable_workload_identity=self.enable_workload_identity,
termination_grace_period_seconds=self.termination_grace_period_seconds,
pre_stop_hook=self.pre_stop_hook,
)
def startTestClient(
self, test_server: XdsTestServer, **kwargs
) -> XdsTestClient:
server_target = (
f"xds:///{self.frontend_service_name}"
f".{self.server_namespace}.svc.cluster.local"
f":{test_server.rpc_port}"
)
kwargs.setdefault("generate_mesh_id", True)
# Waiting for an active channel takes less time in non-gamma
# test suites because they only start waiting after already waited for
# the TD backends to be created and report healthy.
# In GAMMA, these resources are created asynchronously by Kubernetes.
# To compensate for this, we double the timeout for GAMMA tests.
return self._start_test_client(
server_target,
wait_for_server_channel_ready_timeout=datetime.timedelta(
minutes=10
),
**kwargs,
)

@ -1,94 +0,0 @@
# Copyright 2020 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl import flags
# GCP
KUBE_CONTEXT = flags.DEFINE_string(
"kube_context", default=None, help="Kubectl context to use"
)
SECONDARY_KUBE_CONTEXT = flags.DEFINE_string(
"secondary_kube_context",
default=None,
help="Secondary kubectl context to use for cluster in another region",
)
GCP_SERVICE_ACCOUNT = flags.DEFINE_string(
"gcp_service_account",
default=None,
help="GCP Service account for GKE workloads to impersonate",
)
TD_BOOTSTRAP_IMAGE = flags.DEFINE_string(
"td_bootstrap_image",
default=None,
help="Traffic Director gRPC Bootstrap Docker image",
)
# Test app
SERVER_IMAGE = flags.DEFINE_string(
"server_image", default=None, help="Server Docker image name"
)
SERVER_IMAGE_CANONICAL = flags.DEFINE_string(
"server_image_canonical",
default=None,
help=(
"The canonical implementation of the xDS test server.\n"
"Can be used in tests where language-specific xDS test server"
"does not exist, or missing a feature required for the test."
),
)
CLIENT_IMAGE = flags.DEFINE_string(
"client_image", default=None, help="Client Docker image name"
)
DEBUG_USE_PORT_FORWARDING = flags.DEFINE_bool(
"debug_use_port_forwarding",
default=False,
help="Development only: use kubectl port-forward to connect to test app",
)
ENABLE_WORKLOAD_IDENTITY = flags.DEFINE_bool(
"enable_workload_identity",
default=True,
help="Enable the WorkloadIdentity feature",
)
flags.mark_flags_as_required(
[
"kube_context",
"td_bootstrap_image",
"server_image",
"client_image",
]
)
def require_secondary_context(filename: str):
"""
Makes secondary_kube_context flag required and adds the non-empty validator.
Typical usage example:
xds_k8s_flags.require_secondary_context(__file__)
"""
flags.mark_flag_as_required("secondary_kube_context")
def _val_not_empty(val: str) -> bool:
# Do not allow whitespace-only values to produce a better error.
return bool(val.strip())
flags.register_validator(
"secondary_kube_context",
_val_not_empty,
message=(
f"{filename} requires non-empty secondary_kube_context to access"
" the secondary k8s cluster"
),
)

@ -1,378 +0,0 @@
# Copyright 2021 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A test framework built for urlMap related xDS test cases."""
import functools
import inspect
from typing import Any, Iterable, Mapping, Tuple
from absl import flags
from absl import logging
from framework import xds_flags
from framework import xds_k8s_flags
import framework.helpers.rand
from framework.infrastructure import gcp
from framework.infrastructure import k8s
from framework.infrastructure import traffic_director
from framework.test_app.runners.k8s import k8s_xds_client_runner
from framework.test_app.runners.k8s import k8s_xds_server_runner
flags.adopt_module_key_flags(xds_flags)
flags.adopt_module_key_flags(xds_k8s_flags)
STRATEGY = flags.DEFINE_enum(
"strategy",
default="reuse",
enum_values=["create", "keep", "reuse"],
help="Strategy of GCP resources management",
)
# Type alias
_KubernetesServerRunner = k8s_xds_server_runner.KubernetesServerRunner
_KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner
UrlMapType = Any
HostRule = Any
PathMatcher = Any
class _UrlMapChangeAggregator:
"""Where all the urlMap change happens."""
def __init__(self, url_map_name: str):
self._map = {
"name": url_map_name,
"defaultService": GcpResourceManager().default_backend_service(),
"hostRules": [],
"pathMatchers": [],
}
def get_map(self) -> UrlMapType:
return self._map
def apply_change(self, test_case: "XdsUrlMapTestCase") -> None:
logging.info(
"Apply urlMap change for test case: %s.%s",
test_case.short_module_name,
test_case.__name__,
)
url_map_parts = test_case.url_map_change(
*self._get_test_case_url_map(test_case)
)
self._set_test_case_url_map(*url_map_parts)
@staticmethod
def _get_test_case_url_map(
test_case: "XdsUrlMapTestCase",
) -> Tuple[HostRule, PathMatcher]:
host_rule = {
"hosts": [test_case.hostname()],
"pathMatcher": test_case.path_matcher_name(),
}
path_matcher = {
"name": test_case.path_matcher_name(),
"defaultService": GcpResourceManager().default_backend_service(),
}
return host_rule, path_matcher
def _set_test_case_url_map(
self, host_rule: HostRule, path_matcher: PathMatcher
) -> None:
self._map["hostRules"].append(host_rule)
self._map["pathMatchers"].append(path_matcher)
def _package_flags() -> Mapping[str, Any]:
"""Automatically parse Abseil flags into a dictionary.
Abseil flag is only available after the Abseil app initialization. If we use
__new__ in our metaclass, the flag value parse will happen during the
initialization of modules, hence will fail. That's why we are using __call__
to inject metaclass magics, and the flag parsing will be delayed until the
class is about to be instantiated.
"""
res = {}
for flag_module in [xds_flags, xds_k8s_flags]:
for key, value in inspect.getmembers(flag_module):
if isinstance(value, flags.FlagHolder):
res[key.lower()] = value.value
res["strategy"] = STRATEGY.value
return res
class _MetaSingletonAndAbslFlags(type):
"""Ensures singleton and injects flag values."""
# Allow different subclasses to create different singletons.
_instances = {}
# But we only parse Abseil flags once.
_flags = None
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
if cls._flags is None:
cls._flags = _package_flags()
obj = super().__call__(cls._flags, *args, **kwargs)
cls._instances[cls] = obj
return obj
return cls._instances[cls]
class GcpResourceManager(metaclass=_MetaSingletonAndAbslFlags):
"""Manages the lifecycle of GCP resources.
The GCP resources including:
- 3 K8s deployment (client, default backends, alternative backends)
- Full set of the Traffic Director stuff
- Merged gigantic urlMap from all imported test cases
All resources are intended to be used across test cases and multiple runs
(except the client K8s deployment).
"""
# This class dynamically set, so disable "no-member" check.
# pylint: disable=no-member
def __init__(self, absl_flags: Mapping[str, Any] = None):
if absl_flags is not None:
for key in absl_flags:
setattr(self, key, absl_flags[key])
# Pick a client_namespace_suffix if not set
if getattr(self, "resource_suffix", None) is None:
self.resource_suffix = ""
else:
raise NotImplementedError(
"Predefined resource_suffix is not supported for UrlMap tests"
)
logging.info(
"GcpResourceManager: resource prefix=%s, suffix=%s",
self.resource_prefix,
self.resource_suffix,
)
# Must be called before KubernetesApiManager or GcpApiManager init.
xds_flags.set_socket_default_timeout_from_flag()
# API managers
self.k8s_api_manager = k8s.KubernetesApiManager(self.kube_context)
self.gcp_api_manager = gcp.api.GcpApiManager()
self.td = traffic_director.TrafficDirectorManager(
self.gcp_api_manager,
self.project,
resource_prefix=self.resource_prefix,
resource_suffix=(self.resource_suffix or ""),
network=self.network,
compute_api_version=self.compute_api_version,
)
# Kubernetes namespace
self.k8s_namespace = k8s.KubernetesNamespace(
self.k8s_api_manager, self.resource_prefix
)
# Kubernetes Test Servers
self.test_server_runner = _KubernetesServerRunner(
self.k8s_namespace,
deployment_name=self.server_name,
image_name=self.server_image,
gcp_project=self.project,
gcp_api_manager=self.gcp_api_manager,
gcp_service_account=self.gcp_service_account,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
enable_workload_identity=self.enable_workload_identity,
)
self.test_server_alternative_runner = _KubernetesServerRunner(
self.k8s_namespace,
deployment_name=self.server_name + "-alternative",
image_name=self.server_image,
gcp_project=self.project,
gcp_api_manager=self.gcp_api_manager,
gcp_service_account=self.gcp_service_account,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
enable_workload_identity=self.enable_workload_identity,
reuse_namespace=True,
)
self.test_server_affinity_runner = _KubernetesServerRunner(
self.k8s_namespace,
deployment_name=self.server_name + "-affinity",
image_name=self.server_image,
gcp_project=self.project,
gcp_api_manager=self.gcp_api_manager,
gcp_service_account=self.gcp_service_account,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
enable_workload_identity=self.enable_workload_identity,
reuse_namespace=True,
)
logging.info("Strategy of GCP resources management: %s", self.strategy)
def create_test_client_runner(self):
if self.resource_suffix:
client_namespace_suffix = self.resource_suffix
else:
client_namespace_suffix = (
framework.helpers.rand.random_resource_suffix()
)
logging.info(
"GcpResourceManager: client_namespace_suffix=%s",
client_namespace_suffix,
)
# Kubernetes Test Client
namespace_name = _KubernetesClientRunner.make_namespace_name(
self.resource_prefix, client_namespace_suffix
)
return _KubernetesClientRunner(
k8s.KubernetesNamespace(self.k8s_api_manager, namespace_name),
deployment_name=self.client_name,
image_name=self.client_image,
gcp_project=self.project,
gcp_api_manager=self.gcp_api_manager,
gcp_service_account=self.gcp_service_account,
td_bootstrap_image=self.td_bootstrap_image,
xds_server_uri=self.xds_server_uri,
network=self.network,
debug_use_port_forwarding=self.debug_use_port_forwarding,
enable_workload_identity=self.enable_workload_identity,
stats_port=self.client_port,
)
def _pre_cleanup(self):
# Cleanup existing debris
logging.info("GcpResourceManager: pre clean-up")
self.td.cleanup(force=True)
self.test_server_runner.delete_namespace()
def setup(self, test_case_classes: Iterable["XdsUrlMapTestCase"]) -> None:
if self.strategy not in ["create", "keep"]:
logging.info(
"GcpResourceManager: skipping setup for strategy [%s]",
self.strategy,
)
return
# Clean up debris from previous runs
self._pre_cleanup()
# Start creating GCP resources
logging.info("GcpResourceManager: start setup")
# Firewall
if self.ensure_firewall:
self.td.create_firewall_rule(
allowed_ports=self.firewall_allowed_ports
)
# Health Checks
self.td.create_health_check()
# Backend Services
self.td.create_backend_service()
self.td.create_alternative_backend_service()
self.td.create_affinity_backend_service()
# Construct UrlMap from test classes
aggregator = _UrlMapChangeAggregator(
url_map_name=self.td.make_resource_name(self.td.URL_MAP_NAME)
)
for test_case_class in test_case_classes:
aggregator.apply_change(test_case_class)
final_url_map = aggregator.get_map()
# UrlMap
self.td.create_url_map_with_content(final_url_map)
# Target Proxy
self.td.create_target_proxy()
# Forwarding Rule
self.td.create_forwarding_rule(self.server_xds_port)
# Kubernetes Test Server
self.test_server_runner.run(
test_port=self.server_port,
maintenance_port=self.server_maintenance_port,
)
# Kubernetes Test Server Alternative
self.test_server_alternative_runner.run(
test_port=self.server_port,
maintenance_port=self.server_maintenance_port,
)
# Kubernetes Test Server Affinity. 3 endpoints to test that only the
# picked sub-channel is connected.
self.test_server_affinity_runner.run(
test_port=self.server_port,
maintenance_port=self.server_maintenance_port,
replica_count=3,
)
# Add backend to default backend service
neg_name, neg_zones = self.k8s_namespace.parse_service_neg_status(
self.test_server_runner.service_name, self.server_port
)
self.td.backend_service_add_neg_backends(neg_name, neg_zones)
# Add backend to alternative backend service
(
neg_name_alt,
neg_zones_alt,
) = self.k8s_namespace.parse_service_neg_status(
self.test_server_alternative_runner.service_name, self.server_port
)
self.td.alternative_backend_service_add_neg_backends(
neg_name_alt, neg_zones_alt
)
# Add backend to affinity backend service
(
neg_name_affinity,
neg_zones_affinity,
) = self.k8s_namespace.parse_service_neg_status(
self.test_server_affinity_runner.service_name, self.server_port
)
self.td.affinity_backend_service_add_neg_backends(
neg_name_affinity, neg_zones_affinity
)
# Wait for healthy backends
self.td.wait_for_backends_healthy_status()
self.td.wait_for_alternative_backends_healthy_status()
self.td.wait_for_affinity_backends_healthy_status()
def cleanup(self) -> None:
if self.strategy not in ["create"]:
logging.info(
"GcpResourceManager: skipping tear down for strategy [%s]",
self.strategy,
)
return
logging.info("GcpResourceManager: start tear down")
if hasattr(self, "td"):
self.td.cleanup(force=True)
if hasattr(self, "test_server_runner"):
self.test_server_runner.cleanup(force=True)
if hasattr(self, "test_server_alternative_runner"):
self.test_server_alternative_runner.cleanup(
force=True, force_namespace=True
)
if hasattr(self, "test_server_affinity_runner"):
self.test_server_affinity_runner.cleanup(
force=True, force_namespace=True
)
@functools.lru_cache(None)
def default_backend_service(self) -> str:
"""Returns default backend service URL."""
self.td.load_backend_service()
return self.td.backend_service.url
@functools.lru_cache(None)
def alternative_backend_service(self) -> str:
"""Returns alternative backend service URL."""
self.td.load_alternative_backend_service()
return self.td.alternative_backend_service.url
@functools.lru_cache(None)
def affinity_backend_service(self) -> str:
"""Returns affinity backend service URL."""
self.td.load_affinity_backend_service()
return self.td.affinity_backend_service.url

@ -1,607 +0,0 @@
# Copyright 2021 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A test framework built for urlMap related xDS test cases."""
import abc
from dataclasses import dataclass
import datetime
import json
import os
import re
import sys
import time
from typing import Any, Iterable, Mapping, Optional, Tuple
import unittest
from absl import flags
from absl import logging
from google.protobuf import json_format
import grpc
from framework import xds_k8s_testcase
from framework import xds_url_map_test_resources
from framework.helpers import grpc as helpers_grpc
from framework.helpers import retryers
from framework.helpers import skips
from framework.infrastructure import k8s
from framework.test_app import client_app
from framework.test_app.runners.k8s import k8s_xds_client_runner
from framework.test_cases import base_testcase
# Load existing flags
flags.adopt_module_key_flags(xds_k8s_testcase)
flags.adopt_module_key_flags(xds_url_map_test_resources)
# Define urlMap specific flags
QPS = flags.DEFINE_integer("qps", default=25, help="The QPS client is sending")
# Test configs
_URL_MAP_PROPAGATE_TIMEOUT_SEC = 600
# With the per-run IAM change, the first xDS response has a several minutes
# delay. We want to increase the interval, reduce the log spam.
_URL_MAP_PROPAGATE_CHECK_INTERVAL_SEC = 15
URL_MAP_TESTCASE_FILE_SUFFIX = "_test.py"
_CLIENT_CONFIGURE_WAIT_SEC = 2
# Type aliases
XdsTestClient = client_app.XdsTestClient
GcpResourceManager = xds_url_map_test_resources.GcpResourceManager
HostRule = xds_url_map_test_resources.HostRule
PathMatcher = xds_url_map_test_resources.PathMatcher
_KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner
JsonType = Any
_timedelta = datetime.timedelta
# ProtoBuf translatable RpcType enums
RpcTypeUnaryCall = "UNARY_CALL"
RpcTypeEmptyCall = "EMPTY_CALL"
def _split_camel(s: str, delimiter: str = "-") -> str:
"""Turn camel case name to snake-case-like name."""
return "".join(
delimiter + c.lower() if c.isupper() else c for c in s
).lstrip(delimiter)
class DumpedXdsConfig(dict):
"""A convenience class to check xDS config.
Feel free to add more pre-compute fields.
"""
def __init__(self, xds_json: JsonType): # pylint: disable=too-many-branches
super().__init__(xds_json)
self.json_config = xds_json
self.lds = None
self.rds = None
self.rds_version = None
self.cds = []
self.eds = []
self.endpoints = []
for xds_config in self.get("xdsConfig", []):
try:
if "listenerConfig" in xds_config:
self.lds = xds_config["listenerConfig"]["dynamicListeners"][
0
]["activeState"]["listener"]
elif "routeConfig" in xds_config:
self.rds = xds_config["routeConfig"]["dynamicRouteConfigs"][
0
]["routeConfig"]
self.rds_version = xds_config["routeConfig"][
"dynamicRouteConfigs"
][0]["versionInfo"]
elif "clusterConfig" in xds_config:
for cluster in xds_config["clusterConfig"][
"dynamicActiveClusters"
]:
self.cds.append(cluster["cluster"])
elif "endpointConfig" in xds_config:
for endpoint in xds_config["endpointConfig"][
"dynamicEndpointConfigs"
]:
self.eds.append(endpoint["endpointConfig"])
# TODO(lidiz) reduce the catch to LookupError
except Exception as e: # pylint: disable=broad-except
logging.debug(
"Parsing dumped xDS config failed with %s: %s", type(e), e
)
for generic_xds_config in self.get("genericXdsConfigs", []):
try:
if re.search(r"\.Listener$", generic_xds_config["typeUrl"]):
self.lds = generic_xds_config["xdsConfig"]
elif re.search(
r"\.RouteConfiguration$", generic_xds_config["typeUrl"]
):
self.rds = generic_xds_config["xdsConfig"]
self.rds_version = generic_xds_config["versionInfo"]
elif re.search(r"\.Cluster$", generic_xds_config["typeUrl"]):
self.cds.append(generic_xds_config["xdsConfig"])
elif re.search(
r"\.ClusterLoadAssignment$", generic_xds_config["typeUrl"]
):
self.eds.append(generic_xds_config["xdsConfig"])
# TODO(lidiz) reduce the catch to LookupError
except Exception as e: # pylint: disable=broad-except
logging.debug(
"Parsing dumped xDS config failed with %s: %s", type(e), e
)
for endpoint_config in self.eds:
for endpoint in endpoint_config.get("endpoints", {}):
for lb_endpoint in endpoint.get("lbEndpoints", {}):
try:
if lb_endpoint["healthStatus"] == "HEALTHY":
self.endpoints.append(
"%s:%s"
% (
lb_endpoint["endpoint"]["address"][
"socketAddress"
]["address"],
lb_endpoint["endpoint"]["address"][
"socketAddress"
]["portValue"],
)
)
# TODO(lidiz) reduce the catch to LookupError
except Exception as e: # pylint: disable=broad-except
logging.debug(
"Parse endpoint failed with %s: %s", type(e), e
)
def __str__(self) -> str:
return json.dumps(self, indent=2)
class RpcDistributionStats:
"""A convenience class to check RPC distribution.
Feel free to add more pre-compute fields.
"""
num_failures: int
num_oks: int
default_service_rpc_count: int
alternative_service_rpc_count: int
unary_call_default_service_rpc_count: int
empty_call_default_service_rpc_count: int
unary_call_alternative_service_rpc_count: int
empty_call_alternative_service_rpc_count: int
def __init__(self, json_lb_stats: JsonType):
self.num_failures = json_lb_stats.get("numFailures", 0)
self.num_peers = 0
self.num_oks = 0
self.default_service_rpc_count = 0
self.alternative_service_rpc_count = 0
self.unary_call_default_service_rpc_count = 0
self.empty_call_default_service_rpc_count = 0
self.unary_call_alternative_service_rpc_count = 0
self.empty_call_alternative_service_rpc_count = 0
self.raw = json_lb_stats
if "rpcsByPeer" in json_lb_stats:
self.num_peers = len(json_lb_stats["rpcsByPeer"])
if "rpcsByMethod" in json_lb_stats:
for rpc_type in json_lb_stats["rpcsByMethod"]:
for peer in json_lb_stats["rpcsByMethod"][rpc_type][
"rpcsByPeer"
]:
count = json_lb_stats["rpcsByMethod"][rpc_type][
"rpcsByPeer"
][peer]
self.num_oks += count
if rpc_type == "UnaryCall":
if "alternative" in peer:
self.unary_call_alternative_service_rpc_count = (
count
)
self.alternative_service_rpc_count += count
else:
self.unary_call_default_service_rpc_count = count
self.default_service_rpc_count += count
else:
if "alternative" in peer:
self.empty_call_alternative_service_rpc_count = (
count
)
self.alternative_service_rpc_count += count
else:
self.empty_call_default_service_rpc_count = count
self.default_service_rpc_count += count
@dataclass
class ExpectedResult:
"""Describes the expected result of assertRpcStatusCode method below."""
rpc_type: str = RpcTypeUnaryCall
status_code: grpc.StatusCode = grpc.StatusCode.OK
ratio: float = 1
class _MetaXdsUrlMapTestCase(type):
"""Tracking test case subclasses."""
# Automatic discover of all subclasses
_test_case_classes = []
_test_case_names = set()
# Keep track of started and finished test cases, so we know when to setup
# and tear down GCP resources.
_started_test_cases = set()
_finished_test_cases = set()
def __new__(
cls, name: str, bases: Iterable[Any], attrs: Mapping[str, Any]
) -> Any:
# Hand over the tracking objects
attrs["test_case_classes"] = cls._test_case_classes
attrs["test_case_names"] = cls._test_case_names
attrs["started_test_cases"] = cls._started_test_cases
attrs["finished_test_cases"] = cls._finished_test_cases
# Handle the test name reflection
module_name = os.path.split(sys.modules[attrs["__module__"]].__file__)[
-1
]
if module_name.endswith(URL_MAP_TESTCASE_FILE_SUFFIX):
module_name = module_name.replace(URL_MAP_TESTCASE_FILE_SUFFIX, "")
attrs["short_module_name"] = module_name.replace("_", "-")
# Create the class and track
new_class = type.__new__(cls, name, bases, attrs)
if name.startswith("Test"):
cls._test_case_names.add(name)
cls._test_case_classes.append(new_class)
else:
logging.debug("Skipping test case class: %s", name)
return new_class
class XdsUrlMapTestCase(
base_testcase.BaseTestCase, metaclass=_MetaXdsUrlMapTestCase
):
"""XdsUrlMapTestCase is the base class for urlMap related tests.
The subclass is expected to implement 3 methods:
- url_map_change: Updates the urlMap components for this test case
- xds_config_validate: Validates if the client received legit xDS configs
- rpc_distribution_validate: Validates if the routing behavior is correct
"""
test_client_runner: Optional[_KubernetesClientRunner] = None
@staticmethod
def is_supported(config: skips.TestConfig) -> bool:
"""Allow the test case to decide whether it supports the given config.
Returns:
A bool indicates if the given config is supported.
"""
del config
return True
@staticmethod
def client_init_config(rpc: str, metadata: str) -> Tuple[str, str]:
"""Updates the initial RPC configs for this test case.
Each test case will start a test client. The client takes RPC configs
and starts to send RPCs immediately. The config returned by this
function will be used to replace the default configs.
The default configs are passed in as arguments, so this method can
modify part of them.
Args:
rpc: The default rpc config, specifying RPCs to send, format
'UnaryCall,EmptyCall'
metadata: The metadata config, specifying metadata to send with each
RPC, format 'EmptyCall:key1:value1,UnaryCall:key2:value2'.
Returns:
A tuple contains the updated rpc and metadata config.
"""
return rpc, metadata
@staticmethod
@abc.abstractmethod
def url_map_change(
host_rule: HostRule, path_matcher: PathMatcher
) -> Tuple[HostRule, PathMatcher]:
"""Updates the dedicated urlMap components for this test case.
Each test case will have a dedicated HostRule, where the hostname is
generated from the test case name. The HostRule will be linked to a
PathMatcher, where stores the routing logic.
Args:
host_rule: A HostRule GCP resource as a JSON dict.
path_matcher: A PathMatcher GCP resource as a JSON dict.
Returns:
A tuple contains the updated version of given HostRule and
PathMatcher.
"""
@abc.abstractmethod
def xds_config_validate(self, xds_config: DumpedXdsConfig) -> None:
"""Validates received xDS config, if anything is wrong, raise.
This stage only ends when the control plane failed to send a valid
config within a given time range, like 600s.
Args:
xds_config: A DumpedXdsConfig instance can be used as a JSON dict,
but also provides helper fields for commonly checked xDS config.
"""
@abc.abstractmethod
def rpc_distribution_validate(self, test_client: XdsTestClient) -> None:
"""Validates the routing behavior, if any is wrong, raise.
Args:
test_client: A XdsTestClient instance for all sorts of end2end testing.
"""
@classmethod
def hostname(cls):
return "%s.%s:%s" % (
cls.short_module_name,
_split_camel(cls.__name__),
GcpResourceManager().server_xds_port,
)
@classmethod
def path_matcher_name(cls):
# Path matcher name must match r'(?:[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?)'
return "%s-%s-pm" % (cls.short_module_name, _split_camel(cls.__name__))
@classmethod
def setUpClass(cls):
logging.info("----- Testing %s -----", cls.__name__)
logging.info("Logs timezone: %s", time.localtime().tm_zone)
# Raises unittest.SkipTest if given client/server/version does not
# support current test case.
xds_k8s_testcase.evaluate_test_config(cls.is_supported)
# Configure cleanup to run after all tests regardless of
# whether setUpClass failed.
cls.addClassCleanup(cls.cleanupAfterTests)
if not cls.started_test_cases:
# Create the GCP resource once before the first test start
GcpResourceManager().setup(cls.test_case_classes)
cls.started_test_cases.add(cls.__name__)
# Create the test case's own client runner with it's own namespace,
# enables concurrent running with other test cases.
cls.test_client_runner = (
GcpResourceManager().create_test_client_runner()
)
# Start the client, and allow the test to override the initial RPC config.
rpc, metadata = cls.client_init_config(
rpc="UnaryCall,EmptyCall", metadata=""
)
cls.test_client = cls.test_client_runner.run(
server_target=f"xds:///{cls.hostname()}",
rpc=rpc,
metadata=metadata,
qps=QPS.value,
print_response=True,
)
@classmethod
def cleanupAfterTests(cls):
logging.info("----- TestCase %s teardown -----", cls.__name__)
client_restarts: int = 0
if cls.test_client_runner:
try:
logging.debug("Getting pods restart times")
client_restarts = cls.test_client_runner.get_pod_restarts(
cls.test_client_runner.deployment
)
except (retryers.RetryError, k8s.NotFound) as e:
logging.exception(e)
cls.finished_test_cases.add(cls.__name__)
# Whether to clean up shared pre-provisioned infrastructure too.
# We only do it after all tests are finished.
cleanup_all = cls.finished_test_cases == cls.test_case_names
# Graceful cleanup: try three times, and don't fail the test on
# a cleanup failure.
retryer = retryers.constant_retryer(
wait_fixed=_timedelta(seconds=10),
attempts=3,
log_level=logging.INFO,
)
try:
retryer(cls._cleanup, cleanup_all)
except retryers.RetryError:
logging.exception("Got error during teardown")
finally:
if hasattr(cls, "test_client_runner") and cls.test_client_runner:
logging.info("----- Test client logs -----")
cls.test_client_runner.logs_explorer_run_history_links()
# Fail if any of the pods restarted.
error_msg = (
"Client container unexpectedly restarted"
f" {client_restarts} times during test."
" In most cases, this is caused by the test client app crash."
)
assert client_restarts == 0, error_msg
@classmethod
def _cleanup(cls, cleanup_all: bool = False):
if cls.test_client_runner:
cls.test_client_runner.cleanup(force=True, force_namespace=True)
if cleanup_all:
GcpResourceManager().cleanup()
def _fetch_and_check_xds_config(self):
# TODO(lidiz) find another way to store last seen xDS config
# Cleanup state for this attempt
# pylint: disable=attribute-defined-outside-init
self._xds_json_config = None
# Fetch client config
config = self.test_client.csds.fetch_client_status(
log_level=logging.INFO
)
self.assertIsNotNone(config)
# Found client config, test it.
self._xds_json_config = json_format.MessageToDict(config)
# pylint: enable=attribute-defined-outside-init
# Execute the child class provided validation logic
self.xds_config_validate(DumpedXdsConfig(self._xds_json_config))
def run(self, result: unittest.TestResult = None) -> None:
"""Abort this test case if CSDS check is failed.
This prevents the test runner to waste time on RPC distribution test,
and yields clearer signal.
"""
if result.failures or result.errors:
logging.info("Aborting %s", self.__class__.__name__)
else:
super().run(result)
def test_client_config(self):
self.test_client.wait_for_active_xds_channel(
xds_server_uri=GcpResourceManager().xds_server_uri,
)
retryer = retryers.constant_retryer(
wait_fixed=datetime.timedelta(
seconds=_URL_MAP_PROPAGATE_CHECK_INTERVAL_SEC
),
timeout=datetime.timedelta(seconds=_URL_MAP_PROPAGATE_TIMEOUT_SEC),
logger=logging,
log_level=logging.INFO,
)
try:
retryer(self._fetch_and_check_xds_config)
finally:
logging.info(
"latest xDS config:\n%s",
GcpResourceManager().td.compute.resource_pretty_format(
self._xds_json_config
),
)
def test_rpc_distribution(self):
self.rpc_distribution_validate(self.test_client)
@classmethod
def configure_and_send(
cls,
test_client: XdsTestClient,
*,
rpc_types: Iterable[str],
metadata: Optional[Iterable[Tuple[str, str, str]]] = None,
app_timeout: Optional[int] = None,
num_rpcs: int,
) -> RpcDistributionStats:
test_client.update_config.configure(
rpc_types=rpc_types, metadata=metadata, app_timeout=app_timeout
)
# Configure RPC might race with get stats RPC on slower machines.
time.sleep(_CLIENT_CONFIGURE_WAIT_SEC)
lb_stats = test_client.get_load_balancer_stats(num_rpcs=num_rpcs)
logging.info(
"[%s] << Received LoadBalancerStatsResponse:\n%s",
test_client.hostname,
helpers_grpc.lb_stats_pretty(lb_stats),
)
return RpcDistributionStats(json_format.MessageToDict(lb_stats))
def assertNumEndpoints(self, xds_config: DumpedXdsConfig, k: int) -> None:
self.assertLen(
xds_config.endpoints,
k,
(
"insufficient endpoints in EDS:"
f" want={k} seen={xds_config.endpoints}"
),
)
def assertRpcStatusCode( # pylint: disable=too-many-locals
self,
test_client: XdsTestClient,
*,
expected: Iterable[ExpectedResult],
length: int,
tolerance: float,
) -> None:
"""Assert the distribution of RPC statuses over a period of time."""
# Sending with pre-set QPS for a period of time
before_stats = test_client.get_load_balancer_accumulated_stats()
logging.info(
(
"Received LoadBalancerAccumulatedStatsResponse from test client"
" %s: before:\n%s"
),
test_client.hostname,
helpers_grpc.accumulated_stats_pretty(before_stats),
)
time.sleep(length)
after_stats = test_client.get_load_balancer_accumulated_stats()
logging.info(
(
"Received LoadBalancerAccumulatedStatsResponse from test client"
" %s: after: \n%s"
),
test_client.hostname,
helpers_grpc.accumulated_stats_pretty(after_stats),
)
# Validate the diff
for expected_result in expected:
rpc = expected_result.rpc_type
status = expected_result.status_code.value[0]
# Compute observation
# ProtoBuf messages has special magic dictionary that we don't need
# to catch exceptions:
# https://developers.google.com/protocol-buffers/docs/reference/python-generated#undefined
seen_after = after_stats.stats_per_method[rpc].result[status]
seen_before = before_stats.stats_per_method[rpc].result[status]
seen = seen_after - seen_before
# Compute total number of RPC started
stats_per_method_after = after_stats.stats_per_method.get(
rpc, {}
).result.items()
total_after = sum(
x[1] for x in stats_per_method_after
) # (status_code, count)
stats_per_method_before = before_stats.stats_per_method.get(
rpc, {}
).result.items()
total_before = sum(
x[1] for x in stats_per_method_before
) # (status_code, count)
total = total_after - total_before
# Compute and validate the number
want = total * expected_result.ratio
diff_ratio = abs(seen - want) / total
self.assertLessEqual(
diff_ratio,
tolerance,
(
f"Expect rpc [{rpc}] to return "
f"[{expected_result.status_code}] at "
f"{expected_result.ratio:.2f} ratio: "
f"seen={seen} want={want} total={total} "
f"diff_ratio={diff_ratio:.4f} > {tolerance:.2f}"
),
)

@ -1,94 +0,0 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${deployment_name}
namespace: ${namespace_name}
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
replicas: 1
selector:
matchLabels:
app: ${deployment_name}
deployment_id: ${deployment_id}
template:
metadata:
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
annotations:
security.cloud.google.com/use-workload-certificates: ""
spec:
serviceAccountName: ${service_account_name}
containers:
- name: ${deployment_name}
image: ${image_name}
imagePullPolicy: Always
startupProbe:
tcpSocket:
port: ${stats_port}
periodSeconds: 3
## Extend the number of probes well beyond the duration of the test
## driver waiting for the container to start.
failureThreshold: 1000
args:
- "--server=${server_target}"
- "--stats_port=${stats_port}"
- "--secure_mode=${secure_mode}"
- "--qps=${qps}"
- "--rpc=${rpc}"
- "--print_response=${print_response}"
ports:
- containerPort: ${stats_port}
env:
- name: GRPC_XDS_BOOTSTRAP
value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
- name: GRPC_XDS_EXPERIMENTAL_SECURITY_SUPPORT
value: "true"
- name: GRPC_XDS_EXPERIMENTAL_V3_SUPPORT
value: "true"
- name: GRPC_EXPERIMENTAL_XDS_ENABLE_OVERRIDE_HOST
value: "true"
volumeMounts:
- mountPath: /tmp/grpc-xds/
name: grpc-td-conf
readOnly: true
resources:
limits:
cpu: 800m
memory: 512Mi
requests:
cpu: 100m
memory: 512Mi
initContainers:
- name: grpc-td-init
image: ${td_bootstrap_image}
imagePullPolicy: Always
args:
- "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
- "--vpc-network-name=${network}"
% if xds_server_uri:
- "--xds-server-uri=${xds_server_uri}"
% endif
% if config_mesh:
- "--config-mesh-experimental=${config_mesh}"
% endif
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
volumeMounts:
- mountPath: /tmp/bootstrap/
name: grpc-td-conf
volumes:
- name: grpc-td-conf
emptyDir:
medium: Memory
...

@ -1,104 +0,0 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${deployment_name}
namespace: ${namespace_name}
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
replicas: 1
selector:
matchLabels:
app: ${deployment_name}
deployment_id: ${deployment_id}
template:
metadata:
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
% if service_account_name:
serviceAccountName: ${service_account_name}
% endif
containers:
- name: ${deployment_name}
image: ${image_name}
imagePullPolicy: Always
startupProbe:
tcpSocket:
port: ${stats_port}
periodSeconds: 3
## Extend the number of probes well beyond the duration of the test
## driver waiting for the container to start.
failureThreshold: 1000
args:
- "--server=${server_target}"
- "--stats_port=${stats_port}"
- "--qps=${qps}"
- "--rpc=${rpc}"
- "--metadata=${metadata}"
- "--print_response=${print_response}"
% if enable_csm_observability:
- "--enable_csm_observability"
% endif
ports:
- containerPort: ${stats_port}
env:
- name: GRPC_XDS_BOOTSTRAP
value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
- name: GRPC_XDS_EXPERIMENTAL_ENABLE_RING_HASH
value: "true"
- name: GRPC_XDS_EXPERIMENTAL_ENABLE_RETRY
value: "true"
- name: GRPC_EXPERIMENTAL_ENABLE_OUTLIER_DETECTION
value: "true"
- name: GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG
value: "true"
- name: GRPC_EXPERIMENTAL_XDS_ENABLE_OVERRIDE_HOST
value: "true"
volumeMounts:
- mountPath: /tmp/grpc-xds/
name: grpc-td-conf
readOnly: true
resources:
limits:
cpu: 800m
memory: 512Mi
requests:
cpu: 100m
memory: 512Mi
initContainers:
- name: grpc-td-init
image: ${td_bootstrap_image}
imagePullPolicy: Always
args:
- "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
- "--vpc-network-name=${network}"
% if xds_server_uri:
- "--xds-server-uri=${xds_server_uri}"
% endif
% if config_mesh:
- "--config-mesh-experimental=${config_mesh}"
% endif
% if generate_mesh_id:
- "--generate-mesh-id-experimental"
% endif
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
volumeMounts:
- mountPath: /tmp/bootstrap/
name: grpc-td-conf
volumes:
- name: grpc-td-conf
emptyDir:
medium: Memory
...

@ -1,15 +0,0 @@
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
namespace: ${namespace_name}
name: ${pod_monitoring_name}
labels:
owner: xds-k8s-interop-test
spec:
selector:
matchLabels:
deployment_id: ${deployment_id}
endpoints:
- port: 9464
interval: 10s

@ -1,17 +0,0 @@
---
kind: GCPBackendPolicy
apiVersion: networking.gke.io/v1
metadata:
name: ${be_policy_name}
namespace: ${namespace_name}
labels:
owner: xds-k8s-interop-test
spec:
targetRef:
group: ""
kind: Service
name: ${service_name}
default:
connectionDraining:
drainingTimeoutSec: 600
...

@ -1,10 +0,0 @@
---
apiVersion: v1
kind: Service
metadata:
name: ${service_name}
namespace: ${namespace_name}
spec:
ports:
- port: 8080
targetPort: 8080

@ -1,22 +0,0 @@
---
kind: GRPCRoute
apiVersion: gateway.networking.k8s.io/v1alpha2
metadata:
name: ${route_name}
namespace: ${namespace_name}
labels:
owner: xds-k8s-interop-test
spec:
parentRefs:
- name: ${mesh_name}
namespace: ${namespace_name}
group: net.gke.io
kind: TDMesh
hostnames:
- ${xds_server_uri}
rules:
- backendRefs:
- name: ${service_name}
port: ${test_port}
namespace: ${namespace_name}
...

@ -1,23 +0,0 @@
---
kind: HTTPRoute
apiVersion: gateway.networking.k8s.io/v1beta1
metadata:
name: ${route_name}
namespace: ${namespace_name}
labels:
owner: xds-k8s-interop-test
spec:
parentRefs:
- name: ${frontend_service_name}
namespace: ${namespace_name}
group: ""
kind: Service
rules:
- matches:
- path:
type: Exact
value: /grpc.testing.TestService/UnaryCall
backendRefs:
- name: ${service_name}
port: 8080
...

@ -1,29 +0,0 @@
---
kind: HTTPRoute
apiVersion: gateway.networking.k8s.io/v1beta1
metadata:
name: ${route_name}
namespace: ${namespace_name}
labels:
owner: xds-k8s-interop-test
spec:
parentRefs:
- name: ${frontend_service_name}
namespace: ${namespace_name}
group: ""
kind: Service
rules:
- matches:
- path:
type: Exact
value: /grpc.testing.TestService/UnaryCall
filters:
- type: ExtensionRef
extensionRef:
group: networking.gke.io
kind: GCPSessionAffinityFilter
name: ssa-filter
backendRefs:
- name: ${service_name}
port: 8080
...

@ -1,17 +0,0 @@
---
apiVersion: v1
kind: Service
metadata:
name: ${service_name}
namespace: ${namespace_name}
labels:
owner: xds-k8s-interop-test
spec:
type: ClusterIP
selector:
app: ${deployment_name}
ports:
- port: ${test_port}
protocol: TCP
targetPort: ${test_port}
...

@ -1,10 +0,0 @@
---
apiVersion: networking.gke.io/v1
kind: GCPSessionAffinityFilter
metadata:
name: ${session_affinity_filter_name}
namespace: ${namespace_name}
spec:
statefulGeneratedCookie:
cookieTtlSeconds: 50
...

@ -1,15 +0,0 @@
---
apiVersion: networking.gke.io/v1
kind: GCPSessionAffinityPolicy
metadata:
name: ${session_affinity_policy_name}
namespace: ${namespace_name}
spec:
statefulGeneratedCookie:
cookieTtlSeconds: 50
targetRef:
name: ${route_name}
group: gateway.networking.k8s.io
kind: HTTPRoute
namespace: ${namespace_name}
...

@ -1,15 +0,0 @@
---
apiVersion: networking.gke.io/v1
kind: GCPSessionAffinityPolicy
metadata:
name: ${session_affinity_policy_name}
namespace: ${namespace_name}
spec:
statefulGeneratedCookie:
cookieTtlSeconds: 50
targetRef:
name: ${service_name}
kind: Service
namespace: ${namespace_name}
group: ""
...

@ -1,9 +0,0 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: ${namespace_name}
labels:
name: ${namespace_name}
owner: xds-k8s-interop-test
...

@ -1,95 +0,0 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${deployment_name}
namespace: ${namespace_name}
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
replicas: ${replica_count}
selector:
matchLabels:
app: ${deployment_name}
deployment_id: ${deployment_id}
template:
metadata:
annotations:
security.cloud.google.com/use-workload-certificates: ""
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
serviceAccountName: ${service_account_name}
containers:
- name: ${deployment_name}
image: ${image_name}
imagePullPolicy: Always
startupProbe:
tcpSocket:
port: ${maintenance_port}
periodSeconds: 3
## Extend the number of probes well beyond the duration of the test
## driver waiting for the container to start.
failureThreshold: 1000
args:
- "--port=${test_port}"
- "--maintenance_port=${maintenance_port}"
- "--secure_mode=${secure_mode}"
ports:
- containerPort: ${test_port}
- containerPort: ${maintenance_port}
env:
- name: GRPC_XDS_BOOTSTRAP
value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
- name: GRPC_XDS_EXPERIMENTAL_SECURITY_SUPPORT
value: "true"
- name: GRPC_XDS_EXPERIMENTAL_V3_SUPPORT
value: "true"
## TODO(sergiitk): this should be conditional for if version < v1.37.x
- name: GRPC_XDS_EXPERIMENTAL_NEW_SERVER_API
value: "true"
- name: GRPC_XDS_EXPERIMENTAL_RBAC
value: "true"
- name: GRPC_EXPERIMENTAL_XDS_ENABLE_OVERRIDE_HOST
value: "true"
volumeMounts:
- mountPath: /tmp/grpc-xds/
name: grpc-td-conf
readOnly: true
resources:
limits:
cpu: 800m
memory: 512Mi
requests:
cpu: 100m
memory: 512Mi
initContainers:
- name: grpc-td-init
image: ${td_bootstrap_image}
imagePullPolicy: Always
args:
- "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
- "--vpc-network-name=${network}"
% if xds_server_uri:
- "--xds-server-uri=${xds_server_uri}"
% endif
- "--node-metadata=app=${namespace_name}-${deployment_name}"
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
volumeMounts:
- mountPath: /tmp/bootstrap/
name: grpc-td-conf
volumes:
- name: grpc-td-conf
emptyDir:
medium: Memory
...

@ -1,101 +0,0 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${deployment_name}
namespace: ${namespace_name}
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
replicas: ${replica_count}
selector:
matchLabels:
app: ${deployment_name}
deployment_id: ${deployment_id}
template:
metadata:
labels:
app: ${deployment_name}
deployment_id: ${deployment_id}
owner: xds-k8s-interop-test
spec:
% if service_account_name:
serviceAccountName: ${service_account_name}
% endif
% if termination_grace_period_seconds:
terminationGracePeriodSeconds: ${termination_grace_period_seconds}
% endif
containers:
- name: ${deployment_name}
image: ${image_name}
imagePullPolicy: Always
startupProbe:
tcpSocket:
port: ${test_port}
periodSeconds: 3
## Extend the number of probes well beyond the duration of the test
## driver waiting for the container to start.
failureThreshold: 1000
args:
- "--port=${test_port}"
% if enable_csm_observability:
- "--enable_csm_observability"
% endif
ports:
- containerPort: ${test_port}
env:
- name: GRPC_XDS_BOOTSTRAP
value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
- name: GRPC_XDS_EXPERIMENTAL_V3_SUPPORT
value: "true"
- name: GRPC_EXPERIMENTAL_XDS_ENABLE_OVERRIDE_HOST
value: "true"
volumeMounts:
- mountPath: /tmp/grpc-xds/
name: grpc-td-conf
readOnly: true
resources:
limits:
cpu: 800m
memory: 512Mi
requests:
cpu: 100m
memory: 512Mi
% if pre_stop_hook:
lifecycle:
preStop:
exec:
command: ["tail", "-f", "/dev/null"]
% endif
initContainers:
- name: grpc-td-init
image: ${td_bootstrap_image}
imagePullPolicy: Always
args:
- "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
- "--vpc-network-name=${network}"
% if xds_server_uri:
- "--xds-server-uri=${xds_server_uri}"
% endif
% if bootstrap_version=="v0.11.0" or bootstrap_version=="v0.12.0":
- "--node-metadata-experimental=app=${namespace_name}-${deployment_name}"
% else:
- "--node-metadata=app=${namespace_name}-${deployment_name}"
% endif
resources:
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
volumeMounts:
- mountPath: /tmp/bootstrap/
name: grpc-td-conf
volumes:
- name: grpc-td-conf
emptyDir:
medium: Memory
...

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save