From efd9803be5dfb367d0649987136a02be0b70ea0b Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Thu, 14 Apr 2016 16:29:24 -0700 Subject: [PATCH 1/4] Uploading results to big query --- tools/run_tests/performance/export_utils.py | 88 +++++++++++++++ .../performance/scenario_result_schema.json | 103 ++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 tools/run_tests/performance/export_utils.py create mode 100644 tools/run_tests/performance/scenario_result_schema.json diff --git a/tools/run_tests/performance/export_utils.py b/tools/run_tests/performance/export_utils.py new file mode 100644 index 00000000000..bdb36a5e822 --- /dev/null +++ b/tools/run_tests/performance/export_utils.py @@ -0,0 +1,88 @@ +# Copyright 2016, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# utilities for exporting benchmark results + +import json +import os +import sys +import uuid + + +gcp_utils_dir = os.path.abspath(os.path.join( + os.path.dirname(__file__), '../../gcp/utils')) +sys.path.append(gcp_utils_dir) +import big_query_utils + + +_PROJECT_ID='grpc-testing' +_DATASET_ID='test_dataset' +_RESULTS_TABLE_ID='scenario_results' + + +def upload_scenario_result_to_bigquery(result_file): + bq = big_query_utils.create_big_query() + _create_results_table(bq) + + with open(result_file, 'r') as f: + scenario_result = json.loads(f.read()) + _insert_result(bq, scenario_result) + + +def _insert_result(bq, scenario_result): + _flatten_result_inplace(scenario_result) + + # TODO: handle errors... + row = big_query_utils.make_row(str(uuid.uuid4()), scenario_result) + return big_query_utils.insert_rows(bq, + _PROJECT_ID, + _DATASET_ID, + _RESULTS_TABLE_ID, + [row]) + + +def _create_results_table(bq): + with open(os.path.dirname(__file__) + '/scenario_result_schema.json', 'r') as f: + table_schema = json.loads(f.read()) + desc = 'Results of performance benchmarks.' + return big_query_utils.create_table2(bq, _PROJECT_ID, _DATASET_ID, + _RESULTS_TABLE_ID, table_schema, desc) + + +def _flatten_result_inplace(scenario_result): + """Bigquery is not really great for handling deeply nested data + and repeated fields. To maintain values of some fields while keeping + the schema relatively simple, we artificially leave some of the fields + as JSON strings. + """ + scenario_result['scenario']['clientConfig'] = json.dumps(scenario_result['scenario']['clientConfig']) + scenario_result['scenario']['serverConfig'] = json.dumps(scenario_result['scenario']['serverConfig']) + scenario_result['latencies'] = json.dumps(scenario_result['latencies']) + for stats in scenario_result['clientStats']: + stats['latencies'] = json.dumps(stats['latencies']) diff --git a/tools/run_tests/performance/scenario_result_schema.json b/tools/run_tests/performance/scenario_result_schema.json new file mode 100644 index 00000000000..39aba21b0c2 --- /dev/null +++ b/tools/run_tests/performance/scenario_result_schema.json @@ -0,0 +1,103 @@ +[ + { + "name": "scenario", + "type": "RECORD", + "mode": "NULLABLE", + "fields": [ + { + "name": "name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "clientConfig", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "numClients", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "serverConfig", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "numServers", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "warmupSeconds", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "benchmarkSeconds", + "type": "INTEGER", + "mode": "NULLABLE" + } + ] + }, + { + "name": "latencies", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "clientStats", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "latencies", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timeElapsed", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "timeUser", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "timeSystem", + "type": "FLOAT", + "mode": "NULLABLE" + } + ] + }, + { + "name": "serverStats", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "timeElapsed", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "timeUser", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "timeSystem", + "type": "FLOAT", + "mode": "NULLABLE" + } + ] + }, + { + "name": "serverCores", + "type": "INTEGER", + "mode": "REPEATED" + } +] From 7d54db8d490b986bd5b704241d38ff0e28141eac Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Thu, 14 Apr 2016 16:57:45 -0700 Subject: [PATCH 2/4] minor refactoring of biq_query_utils --- tools/gcp/utils/big_query_utils.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py index c331a679422..913afd059eb 100755 --- a/tools/gcp/utils/big_query_utils.py +++ b/tools/gcp/utils/big_query_utils.py @@ -71,16 +71,22 @@ def create_dataset(biq_query, project_id, dataset_id): def create_table(big_query, project_id, dataset_id, table_id, table_schema, description): + fields = [{'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, + fields, description) + + +def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, + description): is_success = True body = { 'description': description, 'schema': { - 'fields': [{ - 'name': field_name, - 'type': field_type, - 'description': field_description - } for (field_name, field_type, field_description) in table_schema] + 'fields': fields_schema }, 'tableReference': { 'datasetId': dataset_id, @@ -112,9 +118,7 @@ def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): datasetId=dataset_id, tableId=table_id, body=body) - print body res = insert_req.execute(num_retries=NUM_RETRIES) - print res except HttpError as http_error: print 'Error in inserting rows in the table %s' % table_id is_success = False From 88cc4e26edc158b95f5aa2d54d59e3ffff1a2fc0 Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Thu, 14 Apr 2016 16:58:50 -0700 Subject: [PATCH 3/4] minor changes to schema --- tools/run_tests/performance/export_utils.py | 1 + .../performance/scenario_result_schema.json | 103 +++++++++++++++++- 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/tools/run_tests/performance/export_utils.py b/tools/run_tests/performance/export_utils.py index bdb36a5e822..6df64cca1f1 100644 --- a/tools/run_tests/performance/export_utils.py +++ b/tools/run_tests/performance/export_utils.py @@ -86,3 +86,4 @@ def _flatten_result_inplace(scenario_result): scenario_result['latencies'] = json.dumps(scenario_result['latencies']) for stats in scenario_result['clientStats']: stats['latencies'] = json.dumps(stats['latencies']) + scenario_result['serverCores'] = json.dumps(scenario_result['serverCores']) diff --git a/tools/run_tests/performance/scenario_result_schema.json b/tools/run_tests/performance/scenario_result_schema.json index 39aba21b0c2..10d24a25177 100644 --- a/tools/run_tests/performance/scenario_result_schema.json +++ b/tools/run_tests/performance/scenario_result_schema.json @@ -1,4 +1,41 @@ [ + { + "name": "metadata", + "type": "RECORD", + "mode": "NULLABLE", + "fields": [ + { + "name": "buildNumber", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "buildUrl", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "jobName", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "gitCommit", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "gitActualCommit", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "created", + "type": "TIMESTAMP", + "mode": "NULLABLE" + } + ] + }, { "name": "scenario", "type": "RECORD", @@ -97,7 +134,69 @@ }, { "name": "serverCores", - "type": "INTEGER", - "mode": "REPEATED" + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "summary", + "type": "RECORD", + "mode": "NULLABLE", + "fields": [ + { + "name": "qps", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "qps_per_server_core", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "server_system_time", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "server_user_time", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "client_system_time", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "client_user_time", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "latency_50", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "latency_90", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "latency_95", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "latency_99", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "latency_999", + "type": "FLOAT", + "mode": "NULLABLE" + } + ] } ] From 6d7fa5572e507f6d541b3bca035ba19ad3761e42 Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Thu, 14 Apr 2016 17:42:54 -0700 Subject: [PATCH 4/4] result uploading --- .../{export_utils.py => bq_upload_result.py} | 42 ++++++++++++------- tools/run_tests/performance/run_qps_driver.sh | 40 ++++++++++++++++++ tools/run_tests/run_performance_tests.py | 28 +++++++++---- 3 files changed, 87 insertions(+), 23 deletions(-) rename tools/run_tests/performance/{export_utils.py => bq_upload_result.py} (70%) mode change 100644 => 100755 create mode 100755 tools/run_tests/performance/run_qps_driver.sh diff --git a/tools/run_tests/performance/export_utils.py b/tools/run_tests/performance/bq_upload_result.py old mode 100644 new mode 100755 similarity index 70% rename from tools/run_tests/performance/export_utils.py rename to tools/run_tests/performance/bq_upload_result.py index 6df64cca1f1..0f53ba5d02d --- a/tools/run_tests/performance/export_utils.py +++ b/tools/run_tests/performance/bq_upload_result.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python2.7 # Copyright 2016, Google Inc. # All rights reserved. # @@ -27,8 +28,9 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# utilities for exporting benchmark results +# Uploads performance benchmark result file to bigquery. +import argparse import json import os import sys @@ -42,37 +44,36 @@ import big_query_utils _PROJECT_ID='grpc-testing' -_DATASET_ID='test_dataset' -_RESULTS_TABLE_ID='scenario_results' -def upload_scenario_result_to_bigquery(result_file): +def _upload_scenario_result_to_bigquery(dataset_id, table_id, result_file): bq = big_query_utils.create_big_query() - _create_results_table(bq) + _create_results_table(bq, dataset_id, table_id) with open(result_file, 'r') as f: scenario_result = json.loads(f.read()) - _insert_result(bq, scenario_result) + if not _insert_result(bq, dataset_id, table_id, scenario_result): + print 'Error uploading result to bigquery.' + sys.exit(1) -def _insert_result(bq, scenario_result): - _flatten_result_inplace(scenario_result) - # TODO: handle errors... +def _insert_result(bq, dataset_id, table_id, scenario_result): + _flatten_result_inplace(scenario_result) row = big_query_utils.make_row(str(uuid.uuid4()), scenario_result) return big_query_utils.insert_rows(bq, _PROJECT_ID, - _DATASET_ID, - _RESULTS_TABLE_ID, + dataset_id, + table_id, [row]) -def _create_results_table(bq): +def _create_results_table(bq, dataset_id, table_id): with open(os.path.dirname(__file__) + '/scenario_result_schema.json', 'r') as f: table_schema = json.loads(f.read()) desc = 'Results of performance benchmarks.' - return big_query_utils.create_table2(bq, _PROJECT_ID, _DATASET_ID, - _RESULTS_TABLE_ID, table_schema, desc) + return big_query_utils.create_table2(bq, _PROJECT_ID, dataset_id, + table_id, table_schema, desc) def _flatten_result_inplace(scenario_result): @@ -87,3 +88,16 @@ def _flatten_result_inplace(scenario_result): for stats in scenario_result['clientStats']: stats['latencies'] = json.dumps(stats['latencies']) scenario_result['serverCores'] = json.dumps(scenario_result['serverCores']) + + +argp = argparse.ArgumentParser(description='Upload result to big query.') +argp.add_argument('--bq_result_table', required=True, default=None, type=str, + help='Bigquery "dataset.table" to upload results to.') +argp.add_argument('--file_to_upload', default='scenario_result.json', type=str, + help='Report file to upload.') + +args = argp.parse_args() + +dataset_id, table_id = args.bq_result_table.split('.', 2) +_upload_scenario_result_to_bigquery(dataset_id, table_id, args.file_to_upload) +print 'Successfully uploaded %s to BigQuery.\n' % args.file_to_upload diff --git a/tools/run_tests/performance/run_qps_driver.sh b/tools/run_tests/performance/run_qps_driver.sh new file mode 100755 index 00000000000..c8c6890df9d --- /dev/null +++ b/tools/run_tests/performance/run_qps_driver.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Copyright 2015, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -ex + +cd $(dirname $0)/../../.. + +bins/opt/qps_json_driver "$@" + +if [ "$BQ_RESULT_TABLE" != "" ] +then + tools/run_tests/performance/bq_upload_result.py --bq_result_table="$BQ_RESULT_TABLE" +fi diff --git a/tools/run_tests/run_performance_tests.py b/tools/run_tests/run_performance_tests.py index b62a4287477..beedd819ad0 100755 --- a/tools/run_tests/run_performance_tests.py +++ b/tools/run_tests/run_performance_tests.py @@ -93,15 +93,19 @@ def create_qpsworker_job(language, shortname=None, return QpsWorkerJob(jobspec, language, host_and_port) -def create_scenario_jobspec(scenario_json, workers, remote_host=None): +def create_scenario_jobspec(scenario_json, workers, remote_host=None, + bq_result_table=None): """Runs one scenario using QPS driver.""" # setting QPS_WORKERS env variable here makes sure it works with SSH too. - cmd = 'QPS_WORKERS="%s" bins/opt/qps_json_driver ' % ','.join(workers) - cmd += '--scenarios_json=%s' % pipes.quote(json.dumps({'scenarios': [scenario_json]})) - cmd += ' --scenario_result_file=scenario_result.json' + cmd = 'QPS_WORKERS="%s" ' % ','.join(workers) + if bq_result_table: + cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table + cmd += 'tools/run_tests/performance/run_qps_driver.sh ' + cmd += '--scenarios_json=%s ' % pipes.quote(json.dumps({'scenarios': [scenario_json]})) + cmd += '--scenario_result_file=scenario_result.json' if remote_host: user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host) - cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && %s"' % (user_at_host, cmd) + cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (user_at_host, pipes.quote(cmd)) return jobset.JobSpec( cmdline=[cmd], @@ -117,7 +121,7 @@ def create_quit_jobspec(workers, remote_host=None): cmd = 'QPS_WORKERS="%s" bins/opt/qps_driver --quit' % ','.join(workers) if remote_host: user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host) - cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && %s"' % (user_at_host, cmd) + cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (user_at_host, pipes.quote(cmd)) return jobset.JobSpec( cmdline=[cmd], @@ -226,7 +230,8 @@ def start_qpsworkers(languages, worker_hosts): for worker_idx, worker in enumerate(workers)] -def create_scenarios(languages, workers_by_lang, remote_host=None, regex='.*'): +def create_scenarios(languages, workers_by_lang, remote_host=None, regex='.*', + bq_result_table=None): """Create jobspecs for scenarios to run.""" scenarios = [] for language in languages: @@ -248,7 +253,8 @@ def create_scenarios(languages, workers_by_lang, remote_host=None, regex='.*'): workers[idx] = workers_by_lang[custom_server_lang][idx] scenario = create_scenario_jobspec(scenario_json, workers, - remote_host=remote_host) + remote_host=remote_host, + bq_result_table=bq_result_table) scenarios.append(scenario) # the very last scenario requests shutting down the workers. @@ -290,6 +296,8 @@ argp.add_argument('--remote_worker_host', help='Worker hosts where to start QPS workers.') argp.add_argument('-r', '--regex', default='.*', type=str, help='Regex to select scenarios to run.') +argp.add_argument('--bq_result_table', default=None, type=str, + help='Bigquery "dataset.table" to upload results to.') args = argp.parse_args() @@ -298,6 +306,7 @@ languages = set(scenario_config.LANGUAGES[l] scenario_config.LANGUAGES.iterkeys() if x == 'all' else [x] for x in args.language)) + # Put together set of remote hosts where to run and build remote_hosts = set() if args.remote_worker_host: @@ -329,7 +338,8 @@ try: scenarios = create_scenarios(languages, workers_by_lang=worker_addresses, remote_host=args.remote_driver_host, - regex=args.regex) + regex=args.regex, + bq_result_table=args.bq_result_table) if not scenarios: raise Exception('No scenarios to run')