xds interop: Log operation id (#30407)

Add consistent operation id logs for GCP long-running operations - both old-style (compute) and the new APIs.
At the moment it's a bit more verbose than I'd want, f.e. it doubles the number of log messages during the teardown. We should probably only log failed ops. But to do this reliably, we should probably revisit the issue with improving tenacity retry error fail reports.
pull/30417/head
Sergii Tkachenko 2 years ago committed by GitHub
parent 91bcb11c66
commit 9077532620
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 16
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/api.py
  2. 41
      tools/run_tests/xds_k8s_test_driver/framework/infrastructure/gcp/compute.py

@ -435,19 +435,19 @@ class GcpStandardCloudApiResource(GcpProjectApiResource, metaclass=abc.ABCMeta):
def _execute( # pylint: disable=arguments-differ
self,
request: HttpRequest,
timeout_sec=GcpProjectApiResource._WAIT_FOR_OPERATION_SEC):
timeout_sec: int = GcpProjectApiResource._WAIT_FOR_OPERATION_SEC):
operation = request.execute(num_retries=self._GCP_API_RETRIES)
self._wait(operation, timeout_sec)
logger.debug('Operation %s', operation)
self._wait(operation['name'], timeout_sec)
def _wait(self,
operation,
timeout_sec=GcpProjectApiResource._WAIT_FOR_OPERATION_SEC):
op_name = operation['name']
logger.debug('Waiting for %s operation, timeout %s sec: %s',
self.api_name, timeout_sec, op_name)
operation_id: str,
timeout_sec: int = GcpProjectApiResource._WAIT_FOR_OPERATION_SEC):
logger.info('Waiting %s sec for %s operation id: %s', timeout_sec,
self.api_name, operation_id)
op_request = self.api.projects().locations().operations().get(
name=op_name)
name=operation_id)
operation = self.wait_for_operation(
operation_request=op_request,
test_success_fn=lambda result: result['done'],

@ -430,28 +430,29 @@ class ComputeV1(gcp.api.GcpProjectApiResource): # pylint: disable=too-many-publ
self,
request,
*,
test_success_fn=None,
timeout_sec=_WAIT_FOR_OPERATION_SEC):
operation = request.execute(num_retries=self._GCP_API_RETRIES)
logger.debug('Response %s', operation)
logger.debug('Operation %s', operation)
return self._wait(operation['name'], timeout_sec)
def _wait(self,
operation_id: str,
timeout_sec: int = _WAIT_FOR_OPERATION_SEC) -> dict:
logger.info('Waiting %s sec for compute operation id: %s', timeout_sec,
operation_id)
# TODO(sergiitk) try using wait() here
# https://googleapis.github.io/google-api-python-client/docs/dyn/compute_v1.globalOperations.html#wait
operation_request = self.api.globalOperations().get(
project=self.project, operation=operation['name'])
if test_success_fn is None:
test_success_fn = self._operation_status_done
logger.debug('Waiting for global operation %s, timeout %s sec',
operation['name'], timeout_sec)
response = self.wait_for_operation(operation_request=operation_request,
test_success_fn=test_success_fn,
timeout_sec=timeout_sec)
if 'error' in response:
logger.debug('Waiting for global operation failed, response: %r',
response)
raise Exception(f'Operation {operation["name"]} did not complete '
f'within {timeout_sec}s, error={response["error"]}')
return response
op_request = self.api.globalOperations().get(project=self.project,
operation=operation_id)
operation = self.wait_for_operation(
operation_request=op_request,
test_success_fn=self._operation_status_done,
timeout_sec=timeout_sec)
logger.debug('Completed operation: %s', operation)
if 'error' in operation:
# This shouldn't normally happen: gcp library raises on errors.
raise Exception(f'Compute operation {operation_id} '
f'failed: {operation}')
return operation

Loading…
Cancel
Save