Update related scripts to populate BigQuery fields (#26778)

* Update related scripts to populate BigQuery fields
4 years ago · 971e95b906
parent 3cf55bcc58
commit 971e95b906
3 changed files with 76 additions and 5 deletions
--- a/tools/internal_ci/linux/grpc_e2e_performance_v2.sh
+++ b/tools/internal_ci/linux/grpc_e2e_performance_v2.sh
@ -78,6 +78,11 @@ buildConfigs() {
        -s big_query_table="${table}" -s timeout_seconds=900 \
        -s prebuilt_image_prefix="${PREBUILT_IMAGE_PREFIX}" \
        -s prebuilt_image_tag="${UNIQUE_IDENTIFIER}" \
+        -a ci_buildNumber="${KOKORO_BUILD_NUMBER}" \
+        -a ci_buildUrl= "https://source.cloud.google.com/results/invocations/" + "${KOKORO_BUILD_NUMBER}" \
+        -a ci_jobName="${KOKORO_JOB_NAME}" \
+        -a ci_gitCommit="${KOKORO_GIT_COMMIT}" \
+        -a ci_gitActualCommit="${ghprbActualCommit}" \
        --prefix="${LOAD_TEST_PREFIX}" -u "${UNIQUE_IDENTIFIER}" -u "${pool}" \
        -a pool="${pool}" --category=scalable \
        --allow_client_language=c++ --allow_server_language=c++ \
--- a/tools/run_tests/performance/bq_upload_result.py
+++ b/tools/run_tests/performance/bq_upload_result.py
@ -62,14 +62,16 @@ def _upload_netperf_latency_csv_to_bigquery(dataset_id, table_id, result_file):
        sys.exit(1)


-def _upload_scenario_result_to_bigquery(dataset_id, table_id, result_file):
+def _upload_scenario_result_to_bigquery(dataset_id, table_id, result_file,
+                                        metadata_file):
    with open(result_file, 'r') as f:
        scenario_result = json.loads(f.read())

    bq = big_query_utils.create_big_query()
    _create_results_table(bq, dataset_id, table_id)

-    if not _insert_result(bq, dataset_id, table_id, scenario_result):
+    if not _insert_scenario_result(bq, dataset_id, table_id, scenario_result,
+                                   metadata_file):
        print('Error uploading result to bigquery.')
        sys.exit(1)

@ -83,6 +85,20 @@ def _insert_result(bq, dataset_id, table_id, scenario_result, flatten=True):
                                       [row])


+def _insert_scenario_result(bq,
+                            dataset_id,
+                            table_id,
+                            scenario_result,
+                            test_metadata_file,
+                            flatten=True):
+    if flatten:
+        _flatten_result_inplace(scenario_result)
+    _populate_metadata_from_file(scenario_result, test_metadata_file)
+    row = big_query_utils.make_row(str(uuid.uuid4()), scenario_result)
+    return big_query_utils.insert_rows(bq, _PROJECT_ID, dataset_id, table_id,
+                                       [row])
+
+
 def _create_results_table(bq, dataset_id, table_id):
    with open(os.path.dirname(__file__) + '/scenario_result_schema.json',
              'r') as f:
@ -158,6 +174,45 @@ def _populate_metadata_inplace(scenario_result):
    scenario_result['metadata'] = metadata


+def _populate_metadata_from_file(scenario_result, test_metadata_file):
+    utc_timestamp = str(calendar.timegm(time.gmtime()))
+    metadata = {'created': utc_timestamp}
+
+    _annotation_to_bq_metadata_key_map = {
+        'ci_' + key: key for key in (
+            'buildNumber',
+            'buildUrl',
+            'jobName',
+            'gitCommit',
+            'gitActualCommit',
+        )
+    }
+
+    if os.access(test_metadata_file, os.R_OK):
+        with open(test_metadata_file, 'r') as f:
+            test_metadata = json.loads(f.read())
+
+        # eliminate managedFields from metadata set
+        if 'managedFields' in test_metadata:
+            del test_metadata['managedFields']
+
+        annotations = test_metadata.get('annotations', {})
+
+        # if use kubectl apply ..., kubectl will append current configuration to
+        # annotation, the field is deleted since it includes a lot of irrelevant
+        # information
+        if 'kubectl.kubernetes.io/last-applied-configuration' in annotations:
+            del annotations['kubectl.kubernetes.io/last-applied-configuration']
+
+        # dump all metadata as JSON to testMetadata field
+        scenario_result['testMetadata'] = json.dumps(test_metadata)
+        for key, value in _annotation_to_bq_metadata_key_map.items():
+            if key in annotations:
+                metadata[value] = annotations[key]
+
+    scenario_result['metadata'] = metadata
+
+
 argp = argparse.ArgumentParser(description='Upload result to big query.')
 argp.add_argument('--bq_result_table',
                  required=True,
@ -168,6 +223,10 @@ argp.add_argument('--file_to_upload',
                  default='scenario_result.json',
                  type=str,
                  help='Report file to upload.')
+argp.add_argument('--metadata_file_to_upload',
+                  default='metadata.json',
+                  type=str,
+                  help='Metadata file to upload.')
 argp.add_argument('--file_format',
                  choices=['scenario_result', 'netperf_latency_csv'],
                  default='scenario_result',
@ -182,5 +241,7 @@ if args.file_format == 'netperf_latency_csv':
                                            args.file_to_upload)
 else:
    _upload_scenario_result_to_bigquery(dataset_id, table_id,
-                                        args.file_to_upload)
-print('Successfully uploaded %s to BigQuery.\n' % args.file_to_upload)
+                                        args.file_to_upload,
+                                        args.metadata_file_to_upload)
+print('Successfully uploaded %s and %s to BigQuery.\n' %
+      (args.file_to_upload, args.metadata_file_to_upload))
--- a/tools/run_tests/performance/scenario_result_schema.json
+++ b/tools/run_tests/performance/scenario_result_schema.json
@ -1895,5 +1895,10 @@
    "mode": "NULLABLE", 
    "name": "serverCpuUsage", 
    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "testMetadata",
+    "type": "STRING"
  }
-]
+]