From b5708790101c9ad207495d3178d9f36684a71e43 Mon Sep 17 00:00:00 2001 From: Sergii Tkachenko Date: Mon, 4 Dec 2023 15:00:29 -0800 Subject: [PATCH] [PSM Interop] Log on debug level when resource deletion failure is 404 (#35131) Removes noise from the cleanup/teardown ops. #### GCP APIs In GCP APIs, change log level for delete operations that failed because the resource doesn't exist (API 404) from `info` to `debug`. Framework's logging philosophy is to only log external operations (e.g. APIs, RPCs). If no error logged, the op is assumed successful. In the deletion case, is still possible to discriminate between whether the op was actually performed by observing the `Waiting %s sec for %s operation id: %s` log message. #### K8s APIs In K8s APIs: - For delete operations that failed because the resource doesn't exist (API 404) the log level is changed from `info` to `debug` - For delete operations that failed for any other reason, the log level is changed from `info` to `warning` - When `wait_for_deletion` is enabled (it's the default) the delete operation will be confirmed with `logger.info(" %s deleted", name)`. Previously it logged at the `debug` level. Closes #35131 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/35131 from sergiitk:psm-interop-debug-log-on-delete-404 f6629e5132f0f3ef91acb51921b694833d88cbf3 PiperOrigin-RevId: 587851692 --- .../xds_k8s_test_driver/bin/run_ping_pong.py | 1 + .../framework/infrastructure/gcp/api.py | 2 +- .../framework/infrastructure/gcp/compute.py | 4 +- .../test_app/runners/k8s/k8s_base_runner.py | 91 ++++++++++++++----- 4 files changed, 71 insertions(+), 27 deletions(-) diff --git a/tools/run_tests/xds_k8s_test_driver/bin/run_ping_pong.py b/tools/run_tests/xds_k8s_test_driver/bin/run_ping_pong.py index c321e389907..d811a16a378 100755 --- a/tools/run_tests/xds_k8s_test_driver/bin/run_ping_pong.py +++ b/tools/run_tests/xds_k8s_test_driver/bin/run_ping_pong.py @@ -79,6 +79,7 @@ def get_client_rpc_stats( def run_ping_pong(test_client: _XdsTestClient, num_rpcs: int): + test_client.wait_for_active_xds_channel() test_client.wait_for_server_channel_ready() lb_stats = get_client_rpc_stats(test_client, num_rpcs) for backend, rpcs_count in lb_stats.rpcs_by_peer.items(): diff --git a/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py b/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py index 05bd08ab6f2..33e48d76009 100644 --- a/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py +++ b/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py @@ -501,7 +501,7 @@ class GcpStandardCloudApiResource(GcpProjectApiResource, metaclass=abc.ABCMeta): return True except _HttpError as error: if error.resp and error.resp.status == 404: - logger.info("%s not deleted since it does not exist", full_name) + logger.debug("%s not deleted since it doesn't exist", full_name) else: logger.warning("Failed to delete %s, %r", full_name, error) return False diff --git a/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py b/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py index 93ed60d25fc..b06a1007b4e 100644 --- a/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py +++ b/tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py @@ -565,8 +565,8 @@ class ComputeV1( return True except googleapiclient.errors.HttpError as error: if error.resp and error.resp.status == 404: - logger.info( - 'Resource %s "%s" not deleted since it does not exist', + logger.debug( + "Resource %s %s not deleted since it doesn't exist", resource_type, resource_name, ) diff --git a/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py b/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py index da17961d5f4..3df9bd19a25 100644 --- a/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py +++ b/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py @@ -582,20 +582,32 @@ class KubernetesBaseRunner(base_runner.BaseRunner, metaclass=ABCMeta): logger.info("Deleting HTTPRoute %s", name) try: self.k8s_namespace.delete_gamma_route(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info("HTTPRoute %s deletion failed: %s", name, e) + except k8s.NotFound: + logger.debug( + "HTTPRoute %s not deleted since it doesn't exist", name + ) + return + except retryers.RetryError as e: + logger.warning("HTTPRoute %s deletion failed: %s", name, e) return if wait_for_deletion: self.k8s_namespace.wait_for_get_gamma_route_deleted(name) - logger.debug("HTTPRoute %s deleted", name) + logger.info("HTTPRoute %s deleted", name) def _delete_session_affinity_policy(self, name, wait_for_deletion=True): logger.info("Deleting GCPSessionAffinityPolicy %s", name) try: self.k8s_namespace.delete_session_affinity_policy(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info( + except k8s.NotFound: + logger.debug( + "GCPSessionAffinityPolicy %s not deleted since it" + " doesn't exist", + name, + ) + return + except retryers.RetryError as e: + logger.warning( "GCPSessionAffinityPolicy %s deletion failed: %s", name, e ) return @@ -604,14 +616,21 @@ class KubernetesBaseRunner(base_runner.BaseRunner, metaclass=ABCMeta): self.k8s_namespace.wait_for_get_session_affinity_policy_deleted( name ) - logger.debug("GCPSessionAffinityPolicy %s deleted", name) + logger.info("GCPSessionAffinityPolicy %s deleted", name) def _delete_session_affinity_filter(self, name, wait_for_deletion=True): logger.info("Deleting GCPSessionAffinityFilter %s", name) try: self.k8s_namespace.delete_session_affinity_filter(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info( + except k8s.NotFound: + logger.debug( + "GCPSessionAffinityFilter %s not deleted since it" + " doesn't exist", + name, + ) + return + except retryers.RetryError as e: + logger.warning( "GCPSessionAffinityFilter %s deletion failed: %s", name, e ) return @@ -620,71 +639,95 @@ class KubernetesBaseRunner(base_runner.BaseRunner, metaclass=ABCMeta): self.k8s_namespace.wait_for_get_session_affinity_filter_deleted( name ) - logger.debug("GCPSessionAffinityFilter %s deleted", name) + logger.info("GCPSessionAffinityFilter %s deleted", name) def _delete_backend_policy(self, name, wait_for_deletion=True): logger.info("Deleting GCPBackendPolicy %s", name) try: self.k8s_namespace.delete_backend_policy(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info("GGCPBackendPolicy %s deletion failed: %s", name, e) + except k8s.NotFound: + logger.debug( + "GGCPBackendPolicy %s not deleted since it doesn't exist", name + ) + return + except retryers.RetryError as e: + logger.warning("GGCPBackendPolicy %s deletion failed: %s", name, e) return if wait_for_deletion: self.k8s_namespace.wait_for_get_backend_policy_deleted(name) - logger.debug("GCPBackendPolicy %s deleted", name) + logger.info("GCPBackendPolicy %s deleted", name) def _delete_deployment(self, name, wait_for_deletion=True): logger.info("Deleting deployment %s", name) self.stop_pod_dependencies() try: self.k8s_namespace.delete_deployment(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info("Deployment %s deletion failed: %s", name, e) + except k8s.NotFound: + logger.debug( + "Deployment %s not deleted since it doesn't exist", name + ) + return + except retryers.RetryError as e: + logger.warning("Deployment %s deletion failed: %s", name, e) return if wait_for_deletion: self.k8s_namespace.wait_for_deployment_deleted(name) - logger.debug("Deployment %s deleted", name) + logger.info("Deployment %s deleted", name) def _delete_service(self, name, wait_for_deletion=True): logger.info("Deleting service %s", name) try: self.k8s_namespace.delete_service(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info("Service %s deletion failed: %s", name, e) + except k8s.NotFound: + logger.debug("Service %s not deleted since it doesn't exist", name) + return + except retryers.RetryError as e: + logger.warning("Service %s deletion failed: %s", name, e) return if wait_for_deletion: self.k8s_namespace.wait_for_service_deleted(name) - logger.debug("Service %s deleted", name) + logger.info("Service %s deleted", name) def _delete_service_account(self, name, wait_for_deletion=True): logger.info("Deleting service account %s", name) try: self.k8s_namespace.delete_service_account(name) - except (retryers.RetryError, k8s.NotFound) as e: - logger.info("Service account %s deletion failed: %s", name, e) + except k8s.NotFound: + logger.debug( + "Service account %s not deleted since it doesn't exist", name + ) + return + except retryers.RetryError as e: + logger.warning("Service account %s deletion failed: %s", name, e) return if wait_for_deletion: self.k8s_namespace.wait_for_service_account_deleted(name) - logger.debug("Service account %s deleted", name) + logger.info("Service account %s deleted", name) def delete_namespace(self, wait_for_deletion=True): logger.info("Deleting namespace %s", self.k8s_namespace.name) try: self.k8s_namespace.delete() - except (retryers.RetryError, k8s.NotFound) as e: - logger.info( + except k8s.NotFound: + logger.debug( + "Namespace %s not deleted since it doesn't exist", + self.k8s_namespace.name, + ) + return + except retryers.RetryError as e: + logger.warning( "Namespace %s deletion failed: %s", self.k8s_namespace.name, e ) return if wait_for_deletion: self.k8s_namespace.wait_for_namespace_deleted() - logger.debug("Namespace %s deleted", self.k8s_namespace.name) + logger.info("Namespace %s deleted", self.k8s_namespace.name) def _wait_deployment_with_available_replicas(self, name, count=1, **kwargs): logger.info(