[PSM Interop] Fixes the triager hint - add missing date, turn off the highlighter (#34030)

Addresses some issues of the initial triage hint PR:
https://github.com/grpc/grpc/pull/33898.

1. Print unhealthy backend name before the health info - previously it
was unclear health status of which backend is dumped
2. Add missing `retry_err.add_note(note)` calls
3. Turn off the highlighter in triager hints, which isn't rendered
properly in the stack trace saved to junit.xml
pull/34018/head
Sergii Tkachenko 1 year ago committed by GitHub
parent 54651a7168
commit b4063a8c3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 32
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py
  2. 13
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py
  3. 49
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/k8s.py

@ -400,10 +400,36 @@ class GcpProjectApiResource:
except _HttpLib2Error as error: except _HttpLib2Error as error:
raise TransportError(error) raise TransportError(error)
def resource_pretty_format(self, body: dict) -> str: def resource_pretty_format(
self,
resource: Any,
*,
highlight: bool = True,
) -> str:
"""Return a string with pretty-printed resource body.""" """Return a string with pretty-printed resource body."""
yaml_out: str = yaml.dump(body, explicit_start=True, explicit_end=True) yaml_out: str = yaml.dump(
return self._highlighter.highlight(yaml_out) resource,
explicit_start=True,
explicit_end=True,
)
return self._highlighter.highlight(yaml_out) if highlight else yaml_out
def resources_pretty_format(
self,
resources: list[Any],
*,
highlight: bool = True,
) -> str:
out = []
for resource in resources:
if hasattr(resource, "name"):
out.append(f"{resource.name}:")
elif "name" in resource:
out.append(f"{resource['name']}:")
out.append(
self.resource_pretty_format(resource, highlight=highlight)
)
return "\n".join(out)
@staticmethod @staticmethod
def wait_for_operation( def wait_for_operation(

@ -422,15 +422,15 @@ class ComputeV1(
backend_service, backend_service,
backend, backend,
) )
unhealthy.append(health_status) unhealthy.append(
{"name": backend.name, "health_status": health_status}
)
# Override the plain list of unhealthy backend name with # Override the plain list of unhealthy backend name with
# the one showing the latest backend statuses. # the one showing the latest backend statuses.
unhealthy_backends = "\n".join( unhealthy_backends = self.resources_pretty_format(
[ unhealthy,
self.resource_pretty_format(unhealthy_backend) highlight=False,
for unhealthy_backend in unhealthy
]
) )
except Exception as error: # noqa pylint: disable=broad-except except Exception as error: # noqa pylint: disable=broad-except
logger.debug( logger.debug(
@ -452,6 +452,7 @@ class ComputeV1(
), ),
) )
) )
raise raise
def _retry_backends_health( def _retry_backends_health(

@ -416,14 +416,16 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
try: try:
retryer(self.get_service, name) retryer(self.get_service, name)
except retryers.RetryError as retry_err: except retryers.RetryError as retry_err:
framework.errors.FrameworkError.note_blanket_error_info_below( result = retry_err.result()
note = framework.errors.FrameworkError.note_blanket_error_info_below(
"A k8s service wasn't assigned a NEG (Network Endpoint Group).", "A k8s service wasn't assigned a NEG (Network Endpoint Group).",
info_below=( info_below=(
f"Timeout {timeout} (h:mm:ss) waiting for service {name}" f"Timeout {timeout} (h:mm:ss) waiting for service {name}"
f" to report NEG status. Last service status:\n" f" to report NEG status. Last service status:\n"
f"{self._pretty_format_status(retry_err.result())}" f"{self._pretty_format_status(result, highlight=False)}"
), ),
) )
retry_err.add_note(note)
raise raise
def get_service_neg( def get_service_neg(
@ -475,15 +477,17 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
try: try:
retryer(self.get_deployment, name) retryer(self.get_deployment, name)
except retryers.RetryError as retry_err: except retryers.RetryError as retry_err:
framework.errors.FrameworkError.note_blanket_error_info_below( result = retry_err.result()
note = framework.errors.FrameworkError.note_blanket_error_info_below(
"The deployment didn't report one or several pods available" "The deployment didn't report one or several pods available"
" (ready for at least minReadySeconds).", " (ready for at least minReadySeconds).",
info_below=( info_below=(
f"Timeout {timeout} (h:mm:ss) waiting for deployment {name}" f"Timeout {timeout} (h:mm:ss) waiting for deployment {name}"
f" to report {count} replicas available. Last status:\n" f" to report {count} replicas available. Last status:\n"
f"{self._pretty_format_status(retry_err.result())}" f"{self._pretty_format_status(result, highlight=False)}"
), ),
) )
retry_err.add_note(note)
raise raise
def wait_for_deployment_replica_count( def wait_for_deployment_replica_count(
@ -504,14 +508,15 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
retryer(self.list_deployment_pods, deployment) retryer(self.list_deployment_pods, deployment)
except retryers.RetryError as retry_err: except retryers.RetryError as retry_err:
result = retry_err.result(default=[]) result = retry_err.result(default=[])
framework.errors.FrameworkError.note_blanket_error_info_below( note = framework.errors.FrameworkError.note_blanket_error_info_below(
"The deployment was unable to initialize one or several pods.", "The deployment was unable to initialize one or several pods.",
info_below=( info_below=(
f"Timeout {timeout} (h:mm:ss) waiting for pod count" f"Timeout {timeout} (h:mm:ss) waiting for pod count"
f" {count}, got: {len(result)}. Pod statuses:\n" f" {count}, got: {len(result)}. Pod statuses:\n"
f"{self._pretty_format_statuses(result)}" f"{self._pretty_format_statuses(result, highlight=False)}"
), ),
) )
retry_err.add_note(note)
raise raise
def wait_for_deployment_deleted( def wait_for_deployment_deleted(
@ -555,13 +560,14 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
try: try:
retryer(self.get_pod, pod_name) retryer(self.get_pod, pod_name)
except retryers.RetryError as retry_err: except retryers.RetryError as retry_err:
result = retry_err.result()
retry_err.add_note( retry_err.add_note(
framework.errors.FrameworkError.note_blanket_error_info_below( framework.errors.FrameworkError.note_blanket_error_info_below(
"The pod didn't start within expected timeout.", "The pod didn't start within expected timeout.",
info_below=( info_below=(
f"Timeout {timeout} (h:mm:ss) waiting for pod" f"Timeout {timeout} (h:mm:ss) waiting for pod"
f" {pod_name} to start. Pod status:\n" f" {pod_name} to start. Pod status:\n"
f"{self._pretty_format_status(retry_err.result())}" f"{self._pretty_format_status(result, highlight=False)}"
), ),
) )
) )
@ -607,13 +613,22 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
return pod_log_collector return pod_log_collector
def _pretty_format_statuses( def _pretty_format_statuses(
self, k8s_objects: List[Optional[object]] self,
k8s_objects: List[Optional[object]],
*,
highlight: bool = True,
) -> str: ) -> str:
return "\n".join( return "\n".join(
self._pretty_format_status(k8s_object) for k8s_object in k8s_objects self._pretty_format_status(k8s_object, highlight=highlight)
for k8s_object in k8s_objects
) )
def _pretty_format_status(self, k8s_object: Optional[object]) -> str: def _pretty_format_status(
self,
k8s_object: Optional[object],
*,
highlight: bool = True,
) -> str:
if k8s_object is None: if k8s_object is None:
return "No data" return "No data"
@ -628,7 +643,10 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
# Pretty-print the status if present. # Pretty-print the status if present.
if hasattr(k8s_object, "status"): if hasattr(k8s_object, "status"):
try: try:
status = self._pretty_format(k8s_object.status.to_dict()) status = self._pretty_format(
k8s_object.status.to_dict(),
highlight=highlight,
)
except Exception as e: # pylint: disable=broad-except except Exception as e: # pylint: disable=broad-except
# Catching all exceptions because not printing the status # Catching all exceptions because not printing the status
# isn't as important as the system under test. # isn't as important as the system under test.
@ -639,10 +657,15 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
# Return the name of k8s object, and its pretty-printed status. # Return the name of k8s object, and its pretty-printed status.
return f"{name}:\n{status}\n" return f"{name}:\n{status}\n"
def _pretty_format(self, data: dict) -> str: def _pretty_format(
self,
data: dict,
*,
highlight: bool = True,
) -> str:
"""Return a string with pretty-printed yaml data from a python dict.""" """Return a string with pretty-printed yaml data from a python dict."""
yaml_out: str = yaml.dump(data, explicit_start=True, explicit_end=True) yaml_out: str = yaml.dump(data, explicit_start=True, explicit_end=True)
return self._highlighter.highlight(yaml_out) return self._highlighter.highlight(yaml_out) if highlight else yaml_out
@classmethod @classmethod
def _check_service_neg_annotation( def _check_service_neg_annotation(

Loading…
Cancel
Save