mirror of https://github.com/grpc/grpc.git
[test] Semi-automatic flaky test tracking (#30638)
* [test] Semi-automatic flaky test tracking * fix * Automated change: Fix sanity tests * Update update_flakes.py * fix formatting * Automated change: Fix sanity tests Co-authored-by: ctiller <ctiller@users.noreply.github.com>pull/30667/head
parent
fedba4654d
commit
f573fd44f9
30 changed files with 543 additions and 507 deletions
@ -0,0 +1,168 @@ |
||||
# Copyright 2022 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
"""A list of flaky tests, consumed by generate_tests.bzl to set flaky attrs.""" |
||||
FLAKY_TESTS = [ |
||||
"h2_census_test@bad_ping", |
||||
"h2_census_test@grpc_authz", |
||||
"h2_census_test@retry_per_attempt_recv_timeout", |
||||
"h2_census_test@retry_too_many_attempts", |
||||
"h2_census_test@retry_transparent_mcs", |
||||
"h2_census_test@shutdown_finishes_calls", |
||||
"h2_compress_test@grpc_authz", |
||||
"h2_compress_test@keepalive_timeout", |
||||
"h2_compress_test@max_connection_age", |
||||
"h2_compress_test@max_connection_idle", |
||||
"h2_compress_test@retry_cancellation", |
||||
"h2_fakesec_test@retry_per_attempt_recv_timeout", |
||||
"h2_fakesec_test@retry_server_pushback_disabled", |
||||
"h2_fakesec_test@retry_too_many_attempts", |
||||
"h2_fakesec_test@shutdown_finishes_calls", |
||||
"h2_fd_test@max_connection_age", |
||||
"h2_full+pipe_test@max_connection_idle", |
||||
"h2_full_no_retry_test@bad_ping", |
||||
"h2_full_no_retry_test@grpc_authz", |
||||
"h2_full_no_retry_test@hpack_size", |
||||
"h2_full_no_retry_test@max_connection_age", |
||||
"h2_full_no_retry_test@max_connection_idle", |
||||
"h2_full_test@bad_ping", |
||||
"h2_full_test@keepalive_timeout", |
||||
"h2_full_test@max_connection_age", |
||||
"h2_full_test@max_connection_idle", |
||||
"h2_full_test@retry_per_attempt_recv_timeout", |
||||
"h2_full_test@retry_recv_trailing_metadata_error", |
||||
"h2_full_test@retry_send_initial_metadata_refs", |
||||
"h2_full_test@retry_server_pushback_delay", |
||||
"h2_http_proxy_test@compressed_payload", |
||||
"h2_http_proxy_test@connectivity", |
||||
"h2_http_proxy_test@grpc_authz", |
||||
"h2_http_proxy_test@hpack_size", |
||||
"h2_http_proxy_test@max_connection_age", |
||||
"h2_http_proxy_test@max_connection_idle", |
||||
"h2_http_proxy_test@ping_pong_streaming", |
||||
"h2_http_proxy_test@retry_per_attempt_recv_timeout", |
||||
"h2_insecure_test@bad_ping", |
||||
"h2_insecure_test@grpc_authz", |
||||
"h2_insecure_test@max_connection_age", |
||||
"h2_insecure_test@max_connection_idle", |
||||
"h2_insecure_test@retry", |
||||
"h2_insecure_test@retry_send_initial_metadata_refs", |
||||
"h2_insecure_test@retry_server_pushback_disabled", |
||||
"h2_local_abstract_uds_percent_encoded_test@max_connection_idle", |
||||
"h2_local_ipv4_test@bad_ping", |
||||
"h2_local_ipv4_test@max_connection_idle", |
||||
"h2_local_ipv4_test@retry", |
||||
"h2_local_ipv4_test@retry_per_attempt_recv_timeout", |
||||
"h2_local_ipv4_test@retry_send_initial_metadata_refs", |
||||
"h2_local_ipv4_test@retry_streaming2", |
||||
"h2_local_ipv6_test@bad_ping", |
||||
"h2_local_ipv6_test@cancel_after_accept", |
||||
"h2_local_ipv6_test@max_connection_idle", |
||||
"h2_local_ipv6_test@retry_per_attempt_recv_timeout", |
||||
"h2_local_ipv6_test@retry_server_pushback_disabled", |
||||
"h2_local_ipv6_test@shutdown_finishes_calls", |
||||
"h2_local_uds_percent_encoded_test@bad_ping", |
||||
"h2_local_uds_percent_encoded_test@connectivity", |
||||
"h2_local_uds_percent_encoded_test@max_connection_idle", |
||||
"h2_local_uds_percent_encoded_test@retry_per_attempt_recv_timeout", |
||||
"h2_local_uds_percent_encoded_test@retry_streaming2", |
||||
"h2_local_uds_percent_encoded_test@retry_transparent_mcs", |
||||
"h2_local_uds_test@bad_ping", |
||||
"h2_local_uds_test@grpc_authz", |
||||
"h2_local_uds_test@max_connection_age", |
||||
"h2_local_uds_test@max_connection_idle", |
||||
"h2_local_uds_test@retry_server_pushback_disabled", |
||||
"h2_oauth2_test@bad_ping", |
||||
"h2_oauth2_test@connectivity", |
||||
"h2_oauth2_test@filter_context", |
||||
"h2_oauth2_test@grpc_authz", |
||||
"h2_oauth2_test@max_connection_age", |
||||
"h2_oauth2_test@max_connection_idle", |
||||
"h2_oauth2_test@retry_server_pushback_delay", |
||||
"h2_oauth2_test@retry_server_pushback_disabled", |
||||
"h2_oauth2_test@retry_too_many_attempts", |
||||
"h2_proxy_test@grpc_authz", |
||||
"h2_proxy_test@retry_send_initial_metadata_refs", |
||||
"h2_proxy_test@shutdown_finishes_calls", |
||||
"h2_sockpair_1byte_test@max_connection_age", |
||||
"h2_sockpair_1byte_test@shutdown_finishes_calls", |
||||
"h2_sockpair_test@grpc_authz", |
||||
"h2_sockpair_test@max_connection_age", |
||||
"h2_ssl_cred_reload_test@bad_ping", |
||||
"h2_ssl_cred_reload_test@client_streaming", |
||||
"h2_ssl_cred_reload_test@connectivity", |
||||
"h2_ssl_cred_reload_test@max_connection_idle", |
||||
"h2_ssl_cred_reload_test@retry", |
||||
"h2_ssl_cred_reload_test@retry_per_attempt_recv_timeout", |
||||
"h2_ssl_cred_reload_test@retry_server_pushback_delay", |
||||
"h2_ssl_cred_reload_test@retry_too_many_attempts", |
||||
"h2_ssl_cred_reload_test@simple_delayed_request", |
||||
"h2_ssl_proxy_test@disappearing_server", |
||||
"h2_ssl_proxy_test@retry_cancellation", |
||||
"h2_ssl_proxy_test@retry_server_pushback_delay", |
||||
"h2_ssl_proxy_test@retry_too_many_attempts", |
||||
"h2_ssl_proxy_test@shutdown_finishes_calls", |
||||
"h2_ssl_test@bad_ping", |
||||
"h2_ssl_test@compressed_payload", |
||||
"h2_ssl_test@grpc_authz", |
||||
"h2_ssl_test@max_connection_idle", |
||||
"h2_ssl_test@retry", |
||||
"h2_ssl_test@retry_cancellation", |
||||
"h2_ssl_test@retry_per_attempt_recv_timeout", |
||||
"h2_ssl_test@retry_send_initial_metadata_refs", |
||||
"h2_ssl_test@retry_server_pushback_disabled", |
||||
"h2_ssl_test@retry_too_many_attempts", |
||||
"h2_ssl_test@shutdown_finishes_calls", |
||||
"h2_tls_certwatch_async_tls1_3_test@cancel_after_invoke", |
||||
"h2_tls_certwatch_async_tls1_3_test@connectivity", |
||||
"h2_tls_certwatch_async_tls1_3_test@max_connection_idle", |
||||
"h2_tls_certwatch_async_tls1_3_test@retry_cancellation", |
||||
"h2_tls_certwatch_async_tls1_3_test@retry_streaming", |
||||
"h2_tls_certwatch_async_tls1_3_test@retry_streaming2", |
||||
"h2_tls_certwatch_async_tls1_3_test@simple_delayed_request", |
||||
"h2_tls_certwatch_sync_tls1_2_test@connectivity", |
||||
"h2_tls_certwatch_sync_tls1_2_test@max_connection_idle", |
||||
"h2_tls_certwatch_sync_tls1_2_test@retry_cancellation", |
||||
"h2_tls_certwatch_sync_tls1_2_test@retry_server_pushback_delay", |
||||
"h2_tls_certwatch_sync_tls1_2_test@simple_delayed_request", |
||||
"h2_tls_certwatch_sync_tls1_2_test@write_buffering", |
||||
"h2_tls_simple_test@bad_ping", |
||||
"h2_tls_simple_test@connectivity", |
||||
"h2_tls_simple_test@max_connection_idle", |
||||
"h2_tls_simple_test@retry_per_attempt_recv_timeout", |
||||
"h2_tls_simple_test@simple_delayed_request", |
||||
"h2_tls_static_async_tls1_3_test@connectivity", |
||||
"h2_tls_static_async_tls1_3_test@max_connection_idle", |
||||
"h2_tls_static_async_tls1_3_test@retry_cancellation", |
||||
"h2_tls_static_async_tls1_3_test@retry_per_attempt_recv_timeout", |
||||
"h2_tls_static_async_tls1_3_test@simple_delayed_request", |
||||
"h2_tls_test@bad_ping", |
||||
"h2_tls_test@connectivity", |
||||
"h2_tls_test@grpc_authz", |
||||
"h2_tls_test@hpack_size", |
||||
"h2_tls_test@invoke_large_request", |
||||
"h2_tls_test@keepalive_timeout", |
||||
"h2_tls_test@max_connection_idle", |
||||
"h2_tls_test@resource_quota_server", |
||||
"h2_tls_test@retry", |
||||
"h2_tls_test@retry_cancellation", |
||||
"h2_tls_test@retry_per_attempt_recv_timeout", |
||||
"h2_tls_test@retry_streaming", |
||||
"h2_tls_test@retry_streaming2", |
||||
"h2_tls_test@simple_delayed_request", |
||||
"h2_uds_test@bad_ping", |
||||
"h2_uds_test@max_connection_idle", |
||||
"h2_uds_test@retry_per_attempt_recv_timeout", |
||||
"h2_uds_test@retry_streaming", |
||||
"h2_uds_test@retry_transparent_mcs", |
||||
] |
@ -0,0 +1,30 @@ |
||||
# Copyright 2022 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import subprocess |
||||
import sys |
||||
import tempfile |
||||
|
||||
|
||||
def run_buildozer(buildozer_commands): |
||||
if not buildozer_commands: |
||||
return |
||||
ok_statuses = (0, 3) |
||||
temp = tempfile.NamedTemporaryFile() |
||||
open(temp.name, 'w').write('\n'.join(buildozer_commands)) |
||||
c = ['tools/distrib/buildozer.sh', '-f', temp.name] |
||||
r = subprocess.call(c) |
||||
if r not in ok_statuses: |
||||
print('{} failed with status {}'.format(c, r)) |
||||
sys.exit(1) |
@ -0,0 +1,102 @@ |
||||
#!/usr/bin/env python3 |
||||
|
||||
# Copyright 2022 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import os |
||||
import re |
||||
import subprocess |
||||
import sys |
||||
|
||||
from google.cloud import bigquery |
||||
import run_buildozer |
||||
import update_flakes_query |
||||
|
||||
lookback_hours = 24 * 7 * 4 |
||||
|
||||
|
||||
def include_test(test): |
||||
if '@' in test: |
||||
return False |
||||
if test.startswith("//test/cpp/qps:"): |
||||
return False |
||||
return True |
||||
|
||||
|
||||
TEST_DIRS = ['test/core', 'test/cpp'] |
||||
tests = {} |
||||
already_flaky = set() |
||||
for test_dir in TEST_DIRS: |
||||
for line in subprocess.check_output( |
||||
['bazel', 'query', 'tests({}/...)'.format(test_dir)]).splitlines(): |
||||
test = line.strip().decode('utf-8') |
||||
if not include_test(test): |
||||
continue |
||||
tests[test] = False |
||||
for test_dir in TEST_DIRS: |
||||
for line in subprocess.check_output( |
||||
['bazel', 'query', |
||||
'attr(flaky, 1, tests({}/...))'.format(test_dir)]).splitlines(): |
||||
test = line.strip().decode('utf-8') |
||||
if not include_test(test): |
||||
continue |
||||
already_flaky.add(test) |
||||
|
||||
flaky_e2e = set() |
||||
|
||||
client = bigquery.Client() |
||||
for row in client.query( |
||||
update_flakes_query.QUERY.format( |
||||
lookback_hours=lookback_hours)).result(): |
||||
if row.test_binary not in tests: |
||||
m = re.match(r'^//test/core/end2end:([^@]*)@([^@]*)(.*)', |
||||
row.test_binary) |
||||
if m: |
||||
flaky_e2e.add('{}@{}{}'.format(m.group(1), m.group(2), m.group(3))) |
||||
print("will mark end2end test {} as flaky".format(row.test_binary)) |
||||
else: |
||||
print("skip obsolete test {}".format(row.test_binary)) |
||||
continue |
||||
print("will mark {} as flaky".format(row.test_binary)) |
||||
tests[row.test_binary] = True |
||||
|
||||
buildozer_commands = [] |
||||
for test, flaky in sorted(tests.items()): |
||||
if flaky: |
||||
buildozer_commands.append('set flaky True|{}'.format(test)) |
||||
elif test in already_flaky: |
||||
buildozer_commands.append('remove flaky|{}'.format(test)) |
||||
|
||||
with open('test/core/end2end/flaky.bzl', 'w') as f: |
||||
with open(sys.argv[0]) as my_source: |
||||
for line in my_source: |
||||
if line[0] != '#': |
||||
break |
||||
for line in my_source: |
||||
if line[0] == '#': |
||||
print(line.strip(), file=f) |
||||
break |
||||
for line in my_source: |
||||
if line[0] != '#': |
||||
break |
||||
print(line.strip(), file=f) |
||||
print( |
||||
"\"\"\"A list of flaky tests, consumed by generate_tests.bzl to set flaky attrs.\"\"\"", |
||||
file=f) |
||||
print("FLAKY_TESTS = [", file=f) |
||||
for line in sorted(list(flaky_e2e)): |
||||
print(" \"{}\",".format(line), file=f) |
||||
print("]", file=f) |
||||
|
||||
run_buildozer.run_buildozer(buildozer_commands) |
@ -0,0 +1,147 @@ |
||||
# Copyright 2022 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
QUERY = """ |
||||
#standardSQL |
||||
|
||||
-- See https://console.cloud.google.com/bigquery?sq=830293263384:5a8549832dfb48d9b2c04312a4ae3181 for the original query |
||||
|
||||
WITH |
||||
|
||||
runs AS ( |
||||
SELECT |
||||
RTRIM(LTRIM(REGEXP_REPLACE(test_target, r'(@poller=.+)', ''))) AS test_binary, |
||||
REGEXP_EXTRACT(test_target, r'poller=(\w+)') AS poll_strategy, |
||||
job_name, |
||||
test_target, |
||||
test_class_name, |
||||
CASE |
||||
# in case of timeout / retry / segfault the "test_case" fields will contain weird stuff |
||||
# e.g. "test_shard2_run0_attempt0" or "shard_2/20" |
||||
# when aggregating, we want to display all of these as a single category of problems |
||||
WHEN test_case like 'test_shard_%_run%_attempt%' THEN 'CANNOT_DETERMINE' |
||||
WHEN test_case like '%shard_%/%' THEN 'CANNOT_DETERMINE' |
||||
# when test_case looks suspiciously like test_target |
||||
# its value is probably meaningless and it means that the entire target has failed |
||||
# e.g. test_target="//test/cpp/client:destroy_grpclb_channel_with_active_connect_stress_test" and test_case="test/cpp/client/destroy_grpclb_channel_with_active_connect_stress_test.exe" |
||||
WHEN STRPOS(test_case, REPLACE(SUBSTR(test_target, 3), ":", "/")) != 0 THEN 'CANNOT_DETERMINE' |
||||
ELSE test_case |
||||
END AS test_case, |
||||
result, |
||||
build_id, |
||||
timestamp |
||||
FROM |
||||
`grpc-testing.jenkins_test_results.rbe_test_results` |
||||
WHERE |
||||
DATETIME_DIFF(CURRENT_DATETIME(), |
||||
dateTIME(timestamp), |
||||
HOUR) < {lookback_hours} |
||||
), |
||||
|
||||
results_counts_per_build AS ( |
||||
SELECT |
||||
test_binary, |
||||
#test_target, # aggregate data over all pollers |
||||
test_class_name, |
||||
test_case, |
||||
SUM(SAFE_CAST(result != 'PASSED' |
||||
AND result != 'SKIPPED' AS INT64)) AS runs_failed, |
||||
SUM(SAFE_CAST(result != 'SKIPPED' AS INT64)) AS runs_total, |
||||
job_name, |
||||
build_id |
||||
FROM |
||||
runs |
||||
GROUP BY |
||||
test_binary, |
||||
test_class_name, |
||||
test_case, |
||||
job_name, |
||||
build_id), |
||||
|
||||
builds_with_missing_cannot_determine_testcase_entry AS ( |
||||
SELECT |
||||
test_binary, |
||||
job_name, |
||||
build_id, |
||||
FROM |
||||
results_counts_per_build |
||||
GROUP BY |
||||
test_binary, |
||||
job_name, |
||||
build_id |
||||
HAVING COUNTIF(test_case = 'CANNOT_DETERMINE') = 0 |
||||
), |
||||
|
||||
# for each test target and build, generate a fake entry with "CANNOT_DETERMINE" test_case |
||||
# if not already present. |
||||
# this is because in many builds, there will be no "CANNOT_DETERMINE" entry |
||||
# and we want to avoid skewing the statistics |
||||
results_counts_per_build_with_fake_cannot_determine_test_case_entries AS ( |
||||
(SELECT * FROM results_counts_per_build) |
||||
UNION ALL |
||||
(SELECT |
||||
test_binary, |
||||
'' AS test_class_name, # when test_case is 'CANNOT_DETERMINE', test class is empty string |
||||
'CANNOT_DETERMINE' AS test_case, # see table "runs" |
||||
0 AS runs_failed, |
||||
1 AS runs_total, |
||||
job_name, |
||||
build_id |
||||
FROM |
||||
builds_with_missing_cannot_determine_testcase_entry) |
||||
), |
||||
|
||||
results_counts AS ( |
||||
SELECT |
||||
test_binary, |
||||
test_class_name, |
||||
test_case, |
||||
job_name, |
||||
SUM(runs_failed) AS runs_failed, |
||||
SUM(runs_total) AS runs_total, |
||||
SUM(SAFE_CAST(runs_failed > 0 AS INT64)) AS builds_failed, |
||||
COUNT(build_id) AS builds_total, |
||||
STRING_AGG(CASE |
||||
WHEN runs_failed > 0 THEN 'X' |
||||
ELSE '_' END, '' |
||||
ORDER BY |
||||
build_id ASC) AS build_failure_pattern, |
||||
FORMAT("%T", ARRAY_AGG(build_id |
||||
ORDER BY |
||||
build_id ASC)) AS builds |
||||
FROM |
||||
#results_counts_per_build |
||||
results_counts_per_build_with_fake_cannot_determine_test_case_entries |
||||
GROUP BY |
||||
test_binary, |
||||
test_class_name, |
||||
test_case, |
||||
job_name |
||||
HAVING |
||||
runs_failed > 0) |
||||
|
||||
SELECT |
||||
ROUND(100*builds_failed / builds_total, 2) AS pct_builds_failed, |
||||
ROUND(100*runs_failed / runs_total, 2) AS pct_runs_failed, |
||||
test_binary, |
||||
test_class_name, |
||||
test_case, |
||||
job_name, |
||||
build_failure_pattern |
||||
|
||||
FROM |
||||
results_counts |
||||
ORDER BY |
||||
pct_builds_failed DESC |
||||
""" |
Loading…
Reference in new issue