[PSM Interop] Extend clean-up script to 2 other GKE clusters (#28983)

* [PSM Interop] Extend clean-up script to 2 other GKE clusters

* Use a safer apprach to invoke the cleanup script

* Handle the readonly issue

* Make sanity test happy
pull/28119/head
Lidi Zheng 3 years ago committed by GitHub
parent 089bab32c2
commit 0966536dc1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      tools/internal_ci/linux/grpc_xds_k8s_install_test_driver.sh
  2. 31
      tools/internal_ci/linux/grpc_xds_resource_cleanup.sh
  3. 44
      tools/run_tests/xds_k8s_test_driver/bin/cleanup/cleanup.py
  4. 38
      tools/run_tests/xds_k8s_test_driver/bin/cleanup/namespace.py

@ -52,7 +52,7 @@ activate_gke_cluster() {
GKE_CLUSTER_PSM_BASIC)
GKE_CLUSTER_NAME="interop-test-psm-basic"
GKE_CLUSTER_ZONE="us-central1-c"
;;
;;
*)
echo "Unknown GKE cluster: ${1}"
exit 1
@ -413,7 +413,7 @@ kokoro_setup_test_driver() {
# TEST_XML_OUTPUT_DIR: Populated with the path to test xUnit XML report
# GIT_ORIGIN_URL: Populated with the origin URL of git repo used for the build
# GIT_COMMIT: Populated with the SHA-1 of git commit being built
# GIT_COMMIT_SHORT: Populated with the short SHA-1 of git commit being built
# GIT_COMMIT_SHORT: Populated with the short SHA-1 of git commit being built
# SECONDARY_KUBE_CONTEXT: Populated with name of kubectl context with secondary GKE cluster access, if any
# Arguments:
# The path to the folder containing the build script

@ -37,3 +37,34 @@ python3 -m bin.cleanup.cleanup \
--kube_context="${KUBE_CONTEXT}" \
--resource_prefix='required-but-does-not-matter' \
--td_bootstrap_image='required-but-does-not-matter' --server_image='required-but-does-not-matter' --client_image='required-but-does-not-matter'
# The BASIC cluster is used by url-map tests. Only cleaning the GKE client
# namespaces, which won't provide much value in debugging. The keep hours is
# reduced to 6.
activate_gke_cluster GKE_CLUSTER_PSM_BASIC
# Invoking the get-crednetials directly, because the
# gcloud_get_cluster_credentials re-sets readonly Bash variables, which is nice
# safety mechanism to keep.
gcloud container clusters get-credentials "${GKE_CLUSTER_NAME}" --zone "${GKE_CLUSTER_ZONE}"
TARGET_KUBE_CONTEXT="$(kubectl config current-context)"
python3 -m bin.cleanup.namespace \
--project=grpc-testing \
--network=default-vpc \
--keep_hours=6 \
--kube_context="${TARGET_KUBE_CONTEXT}" \
--resource_prefix='required-but-does-not-matter' \
--td_bootstrap_image='required-but-does-not-matter' --server_image='required-but-does-not-matter' --client_image='required-but-does-not-matter'
# The PSM_LB cluster is used by k8s_lb tests. Only cleaning the GKE client
# namespaces, which won't provide much value in debugging. The keep hours is
# reduced to 6.
activate_gke_cluster GKE_CLUSTER_PSM_LB
gcloud container clusters get-credentials "${GKE_CLUSTER_NAME}" --zone "${GKE_CLUSTER_ZONE}"
TARGET_KUBE_CONTEXT="$(kubectl config current-context)"
python3 -m bin.cleanup.namespace \
--project=grpc-testing \
--network=default-vpc \
--keep_hours=6 \
--kube_context="${TARGET_KUBE_CONTEXT}" \
--resource_prefix='required-but-does-not-matter' \
--td_bootstrap_image='required-but-does-not-matter' --server_image='required-but-does-not-matter' --client_image='required-but-does-not-matter'

@ -308,6 +308,29 @@ def delete_k8s_resources(dry_run, k8s_resource_rules, project, network,
logging.info('----- Skipped [resource is within expiry date]')
def find_and_remove_leaked_k8s_resources(dry_run, project, network,
gcp_service_account):
k8s_resource_rules = [
# items in each tuple, in order
# - regex to match
# - prefix of the resources
# - function to delete the resource
]
for prefix in CLIENT_PREFIXES.value:
k8s_resource_rules.append(
(f'{prefix}-client-(.*)', prefix, cleanup_client),)
for prefix in SERVER_PREFIXES.value:
k8s_resource_rules.append(
(f'{prefix}-server-(.*)', prefix, cleanup_server),)
# Delete leaked k8s namespaces, those usually mean there are leaked testing
# client/servers from the gke framework.
k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
nss = k8s_api_manager.core.list_namespace()
delete_k8s_resources(dry_run, k8s_resource_rules, project, network,
k8s_api_manager, gcp_service_account, nss.items)
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
@ -359,25 +382,8 @@ def main(argv):
delete_leaked_td_resources(dry_run, td_resource_rules, project, network,
leakedInstanceTemplates)
k8s_resource_rules = [
# items in each tuple, in order
# - regex to match
# - prefix of the resources
# - function to delete the resource
]
for prefix in CLIENT_PREFIXES.value:
k8s_resource_rules.append(
(f'{prefix}-client-(.*)', prefix, cleanup_client),)
for prefix in SERVER_PREFIXES.value:
k8s_resource_rules.append(
(f'{prefix}-server-(.*)', prefix, cleanup_server),)
# Delete leaked k8s namespaces, those usually mean there are leaked testing
# client/servers from the gke framework.
k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
nss = k8s_api_manager.core.list_namespace()
delete_k8s_resources(dry_run, k8s_resource_rules, project, network,
k8s_api_manager, gcp_service_account, nss.items)
find_and_remove_leaked_k8s_resources(dry_run, project, network,
gcp_service_account)
if __name__ == '__main__':

@ -0,0 +1,38 @@
# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Clean up GKE namespaces leaked by the tests."""
from absl import app
from bin.cleanup import cleanup
from framework import xds_flags
from framework import xds_k8s_flags
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
cleanup.load_keep_config()
project: str = xds_flags.PROJECT.value
network: str = xds_flags.NETWORK.value
gcp_service_account: str = xds_k8s_flags.GCP_SERVICE_ACCOUNT.value
dry_run: bool = cleanup.DRY_RUN.value
cleanup.find_and_remove_leaked_k8s_resources(dry_run, project, network,
gcp_service_account)
if __name__ == '__main__':
app.run(main)
Loading…
Cancel
Save