[experiments] Split experiments into two separate experiment definition and rollout definition files (#33228)

The PR does the following:

* Splits the single experiments.yaml file into two files:
experiments.yaml and rollouts.yaml.
* The experiments.yaml will now only include experiment definitions. The
default values of the experiments must now be specified in rollouts.yaml
* Removes the 'release' default value because it is not used.
* Adds an additional_constraints character string to ExperimentMetadata.
* Introduces a hook in src/core/lib/experiments/config.h to allow
registering arbitrary experiment constraint validation callbacks. These
callbacks would take an ExperimentMetadata object as input and return
the correct value to use for an experiment subject to additional
constraints.
pull/33240/head
Vignesh Babu 2 years ago committed by GitHub
parent 4d0611e4a9
commit 4d85f514cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      bazel/experiments.bzl
  2. 1
      bazel/grpc_build_system.bzl
  3. 5
      src/core/BUILD
  4. 24
      src/core/lib/experiments/config.cc
  5. 9
      src/core/lib/experiments/config.h
  6. 79
      src/core/lib/experiments/experiments.cc
  7. 29
      src/core/lib/experiments/experiments.yaml
  8. 62
      src/core/lib/experiments/rollouts.yaml
  9. 63
      tools/codegen/core/gen_experiments.py

@ -62,6 +62,4 @@ EXPERIMENTS = {
},
"on": {
},
"opt": {
},
}

@ -373,7 +373,6 @@ def expand_tests(name, srcs, deps, tags, args, exclude_pollers, uses_polling, us
mode_config = {
# format: <mode>: (enabled_target_tags, disabled_target_tags)
"dbg": (["noopt"], ["nodbg"]),
"opt": (["nodbg"], ["noopt"]),
"on": (None, []),
"off": ([], None),
}

@ -120,7 +120,10 @@ grpc_cc_library(
"//:grpc_experiments_are_final": ["GRPC_EXPERIMENTS_ARE_FINAL"],
"//conditions:default": [],
}),
external_deps = ["absl/strings"],
external_deps = [
"absl/functional:any_invocable",
"absl/strings",
],
language = "c++",
deps = [
"no_destruct",

@ -21,7 +21,9 @@
#include <algorithm>
#include <atomic>
#include <string>
#include <utility>
#include "absl/functional/any_invocable.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
@ -50,13 +52,21 @@ ForcedExperiment g_forced_experiments[kNumExperiments];
std::atomic<bool> g_loaded(false);
absl::AnyInvocable<bool(struct ExperimentMetadata)>* g_check_constraints_cb =
nullptr;
GPR_ATTRIBUTE_NOINLINE Experiments LoadExperimentsFromConfigVariable() {
g_loaded.store(true, std::memory_order_relaxed);
// Set defaults from metadata.
Experiments experiments;
for (size_t i = 0; i < kNumExperiments; i++) {
if (!g_forced_experiments[i].forced) {
experiments.enabled[i] = g_experiment_metadata[i].default_value;
if (g_check_constraints_cb != nullptr) {
experiments.enabled[i] =
(*g_check_constraints_cb)(g_experiment_metadata[i]);
} else {
experiments.enabled[i] = g_experiment_metadata[i].default_value;
}
} else {
experiments.enabled[i] = g_forced_experiments[i].value;
}
@ -151,6 +161,13 @@ void ForceEnableExperiment(absl::string_view experiment, bool enable) {
std::string(experiment).c_str(), enable ? "enable" : "disable");
}
void RegisterExperimentConstraintsValidator(
absl::AnyInvocable<bool(struct ExperimentMetadata)> check_constraints_cb) {
g_check_constraints_cb =
new absl::AnyInvocable<bool(struct ExperimentMetadata)>(
std::move(check_constraints_cb));
}
} // namespace grpc_core
#else
namespace grpc_core {
@ -159,5 +176,10 @@ void ForceEnableExperiment(absl::string_view experiment_name, bool) {
Crash(absl::StrCat("ForceEnableExperiment(\"", experiment_name,
"\") called in final build"));
}
void RegisterExperimentConstraintsValidator(
absl::AnyInvocable<
bool(struct ExperimentMetadata)> /*check_constraints_cb*/) {}
} // namespace grpc_core
#endif

@ -19,6 +19,7 @@
#include <stddef.h>
#include "absl/functional/any_invocable.h"
#include "absl/strings/string_view.h"
// #define GRPC_EXPERIMENTS_ARE_FINAL
@ -51,10 +52,18 @@ void ForceEnableExperiment(absl::string_view experiment_name, bool enable);
struct ExperimentMetadata {
const char* name;
const char* description;
const char* additional_constaints;
bool default_value;
bool allow_in_fuzzing_config;
};
// Register a function to be called to validate the value an experiment can
// take subject to additional constraints.
// The function will take the ExperimentMetadata as its argument. It will return
// a bool value indicating the actual value the experiment should take.
void RegisterExperimentConstraintsValidator(
absl::AnyInvocable<bool(struct ExperimentMetadata)> check_constraints_cb);
} // namespace grpc_core
#endif // GRPC_SRC_CORE_LIB_EXPERIMENTS_CONFIG_H

@ -25,76 +25,111 @@ const char* const description_tcp_frame_size_tuning =
"would not indicate completion of a read operation until a specified "
"number of bytes have been read over the socket. Buffers are also "
"allocated according to estimated RPC sizes.";
const char* const additional_constraints_tcp_frame_size_tuning = "";
const char* const description_tcp_rcv_lowat =
"Use SO_RCVLOWAT to avoid wakeups on the read path.";
const char* const additional_constraints_tcp_rcv_lowat = "";
const char* const description_peer_state_based_framing =
"If set, the max sizes of frames sent to lower layers is controlled based "
"on the peer's memory pressure which is reflected in its max http2 frame "
"size.";
const char* const additional_constraints_peer_state_based_framing = "";
const char* const description_memory_pressure_controller =
"New memory pressure controller";
const char* const additional_constraints_memory_pressure_controller = "";
const char* const description_unconstrained_max_quota_buffer_size =
"Discard the cap on the max free pool size for one memory allocator";
const char* const additional_constraints_unconstrained_max_quota_buffer_size =
"";
const char* const description_event_engine_client =
"Use EventEngine clients instead of iomgr's grpc_tcp_client";
const char* const additional_constraints_event_engine_client = "";
const char* const description_monitoring_experiment =
"Placeholder experiment to prove/disprove our monitoring is working";
const char* const additional_constraints_monitoring_experiment = "";
const char* const description_promise_based_client_call =
"If set, use the new gRPC promise based call code when it's appropriate "
"(ie when all filters in a stack are promise based)";
const char* const additional_constraints_promise_based_client_call = "";
const char* const description_free_large_allocator =
"If set, return all free bytes from a \042big\042 allocator";
const char* const additional_constraints_free_large_allocator = "";
const char* const description_promise_based_server_call =
"If set, use the new gRPC promise based call code when it's appropriate "
"(ie when all filters in a stack are promise based)";
const char* const additional_constraints_promise_based_server_call = "";
const char* const description_transport_supplies_client_latency =
"If set, use the transport represented value for client latency in "
"opencensus";
const char* const additional_constraints_transport_supplies_client_latency = "";
const char* const description_event_engine_listener =
"Use EventEngine listeners instead of iomgr's grpc_tcp_server";
const char* const additional_constraints_event_engine_listener = "";
const char* const description_schedule_cancellation_over_write =
"Allow cancellation op to be scheduled over a write";
const char* const additional_constraints_schedule_cancellation_over_write = "";
const char* const description_trace_record_callops =
"Enables tracing of call batch initiation and completion.";
const char* const additional_constraints_trace_record_callops = "";
const char* const description_event_engine_dns =
"If set, use EventEngine DNSResolver for client channel resolution";
const char* const additional_constraints_event_engine_dns = "";
const char* const description_work_stealing =
"If set, use a work stealing thread pool implementation in EventEngine";
const char* const additional_constraints_work_stealing = "";
const char* const description_client_privacy = "If set, client privacy";
const char* const additional_constraints_client_privacy = "";
const char* const description_canary_client_privacy =
"If set, canary client privacy";
const char* const additional_constraints_canary_client_privacy = "";
const char* const description_server_privacy = "If set, server privacy";
const char* const additional_constraints_server_privacy = "";
} // namespace
namespace grpc_core {
const ExperimentMetadata g_experiment_metadata[] = {
{"tcp_frame_size_tuning", description_tcp_frame_size_tuning, false, true},
{"tcp_rcv_lowat", description_tcp_rcv_lowat, false, true},
{"peer_state_based_framing", description_peer_state_based_framing, false,
true},
{"tcp_frame_size_tuning", description_tcp_frame_size_tuning,
additional_constraints_tcp_frame_size_tuning, false, true},
{"tcp_rcv_lowat", description_tcp_rcv_lowat,
additional_constraints_tcp_rcv_lowat, false, true},
{"peer_state_based_framing", description_peer_state_based_framing,
additional_constraints_peer_state_based_framing, false, true},
{"memory_pressure_controller", description_memory_pressure_controller,
false, true},
additional_constraints_memory_pressure_controller, false, true},
{"unconstrained_max_quota_buffer_size",
description_unconstrained_max_quota_buffer_size, false, true},
{"event_engine_client", description_event_engine_client, false, true},
{"monitoring_experiment", description_monitoring_experiment, true, true},
{"promise_based_client_call", description_promise_based_client_call, false,
true},
{"free_large_allocator", description_free_large_allocator, false, true},
{"promise_based_server_call", description_promise_based_server_call, false,
true},
description_unconstrained_max_quota_buffer_size,
additional_constraints_unconstrained_max_quota_buffer_size, false, true},
{"event_engine_client", description_event_engine_client,
additional_constraints_event_engine_client, false, true},
{"monitoring_experiment", description_monitoring_experiment,
additional_constraints_monitoring_experiment, true, true},
{"promise_based_client_call", description_promise_based_client_call,
additional_constraints_promise_based_client_call, false, true},
{"free_large_allocator", description_free_large_allocator,
additional_constraints_free_large_allocator, false, true},
{"promise_based_server_call", description_promise_based_server_call,
additional_constraints_promise_based_server_call, false, true},
{"transport_supplies_client_latency",
description_transport_supplies_client_latency, false, true},
{"event_engine_listener", description_event_engine_listener, false, true},
description_transport_supplies_client_latency,
additional_constraints_transport_supplies_client_latency, false, true},
{"event_engine_listener", description_event_engine_listener,
additional_constraints_event_engine_listener, false, true},
{"schedule_cancellation_over_write",
description_schedule_cancellation_over_write, false, true},
{"trace_record_callops", description_trace_record_callops, false, true},
{"event_engine_dns", description_event_engine_dns, false, false},
{"work_stealing", description_work_stealing, false, false},
{"client_privacy", description_client_privacy, false, false},
{"canary_client_privacy", description_canary_client_privacy, false, false},
{"server_privacy", description_server_privacy, false, false},
description_schedule_cancellation_over_write,
additional_constraints_schedule_cancellation_over_write, false, true},
{"trace_record_callops", description_trace_record_callops,
additional_constraints_trace_record_callops, false, true},
{"event_engine_dns", description_event_engine_dns,
additional_constraints_event_engine_dns, false, false},
{"work_stealing", description_work_stealing,
additional_constraints_work_stealing, false, false},
{"client_privacy", description_client_privacy,
additional_constraints_client_privacy, false, false},
{"canary_client_privacy", description_canary_client_privacy,
additional_constraints_canary_client_privacy, false, false},
{"server_privacy", description_server_privacy,
additional_constraints_server_privacy, false, false},
};
} // namespace grpc_core

@ -15,14 +15,6 @@
# Format of each entry:
# name: name of the experiment
# description: description of the experiment
# default: one of:
# - broken - the experiment defaults to off and is not tested on
# - false - the experiment defaults to off
# - debug - the experiment defaults to on in debug,
# off in release
# - release - the experiment defaults to on in release,
# off in debug
# - true - the experiment defaults to on
# expiry: when is the next time this experiment *must* be updated
# (date, YYYY/MM/DD)
# test_tags: a set of bazel tags, that if a test declares them signals
@ -39,19 +31,19 @@
# promise_test: tests around the promise architecture
# resource_quota_test: tests known to exercse resource quota
# This file only defines the experiments. Refer to rollouts.yaml for the rollout
# state of each experiment.
- name: tcp_frame_size_tuning
description:
If set, enables TCP to use RPC size estimation made by higher layers.
TCP would not indicate completion of a read operation until a specified
number of bytes have been read over the socket.
Buffers are also allocated according to estimated RPC sizes.
default: false
expiry: 2023/06/01
owner: vigneshbabu@google.com
test_tags: ["endpoint_test", "flow_control_test"]
- name: tcp_rcv_lowat
description: Use SO_RCVLOWAT to avoid wakeups on the read path.
default: false
expiry: 2023/06/01
owner: vigneshbabu@google.com
test_tags: ["endpoint_test", "flow_control_test"]
@ -60,31 +52,26 @@
If set, the max sizes of frames sent to lower layers is controlled based
on the peer's memory pressure which is reflected in its max http2 frame
size.
default: false
expiry: 2023/06/01
owner: vigneshbabu@google.com
test_tags: ["flow_control_test"]
- name: memory_pressure_controller
description: New memory pressure controller
default: false
expiry: 2023/06/01
owner: ctiller@google.com
test_tags: [resource_quota_test]
- name: unconstrained_max_quota_buffer_size
description: Discard the cap on the max free pool size for one memory allocator
default: false
expiry: 2023/06/01
owner: ctiller@google.com
test_tags: [resource_quota_test]
- name: event_engine_client
description: Use EventEngine clients instead of iomgr's grpc_tcp_client
default: false
expiry: 2023/06/13
owner: hork@google.com
test_tags: ["core_end2end_test", "event_engine_client_test"]
- name: monitoring_experiment
description: Placeholder experiment to prove/disprove our monitoring is working
default: true
expiry: 2023/06/01
owner: ctiller@google.com
test_tags: []
@ -92,13 +79,11 @@
description:
If set, use the new gRPC promise based call code when it's appropriate
(ie when all filters in a stack are promise based)
default: false
expiry: 2023/06/01
owner: ctiller@google.com
test_tags: ["core_end2end_test", "lame_client_test"]
- name: free_large_allocator
description: If set, return all free bytes from a "big" allocator
default: false
expiry: 2023/11/01
owner: alishananda@google.com
test_tags: [resource_quota_test]
@ -106,38 +91,32 @@
description:
If set, use the new gRPC promise based call code when it's appropriate
(ie when all filters in a stack are promise based)
default: false
expiry: 2023/06/01
owner: ctiller@google.com
test_tags: ["core_end2end_test", "cpp_end2end_test", "xds_end2end_test"]
- name: transport_supplies_client_latency
description: If set, use the transport represented value for client latency in opencensus
default: false
expiry: 2023/06/01
owner: ctiller@google.com
test_tags: [census_test]
- name: event_engine_listener
description: Use EventEngine listeners instead of iomgr's grpc_tcp_server
default: false
expiry: 2023/06/13
owner: vigneshbabu@google.com
test_tags: ["core_end2end_test", "event_engine_listener_test"]
- name: schedule_cancellation_over_write
description: Allow cancellation op to be scheduled over a write
default: false
expiry: 2023/07/01
owner: vigneshbabu@google.com
test_tags: []
- name: trace_record_callops
description: Enables tracing of call batch initiation and completion.
default: false
expiry: 2023/07/01
owner: vigneshbabu@google.com
test_tags: []
- name: event_engine_dns
description:
If set, use EventEngine DNSResolver for client channel resolution
default: false
expiry: 2023/06/01
owner: yijiem@google.com
test_tags: []
@ -145,7 +124,6 @@
- name: work_stealing
description:
If set, use a work stealing thread pool implementation in EventEngine
default: false
expiry: 2023/06/01
owner: hork@google.com
test_tags: ["core_end2end_test"]
@ -153,7 +131,6 @@
- name: client_privacy
description:
If set, client privacy
default: false
expiry: 2023/11/01
owner: alishananda@google.com
test_tags: []
@ -161,7 +138,6 @@
- name: canary_client_privacy
description:
If set, canary client privacy
default: false
expiry: 2023/11/01
owner: alishananda@google.com
test_tags: []
@ -169,7 +145,6 @@
- name: server_privacy
description:
If set, server privacy
default: false
expiry: 2023/11/01
owner: alishananda@google.com
test_tags: []

@ -0,0 +1,62 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file describes the rollout status of each experiment.
# Format of each entry:
# name: name of the experiment
# default: one of:
# - broken - the experiment defaults to off and is not tested
# - false - the experiment defaults to off.
# - debug - the experiment defaults to on in debug builds,
# off in release builds.
# - true - the experiment defaults to on.
- name: tcp_frame_size_tuning
default: false
- name: tcp_rcv_lowat
default: false
- name: peer_state_based_framing
default: false
- name: memory_pressure_controller
default: false
- name: unconstrained_max_quota_buffer_size
default: false
- name: event_engine_client
default: false
- name: monitoring_experiment
default: true
- name: promise_based_client_call
default: false
- name: free_large_allocator
default: false
- name: promise_based_server_call
default: false
- name: transport_supplies_client_latency
default: false
- name: event_engine_listener
default: false
- name: schedule_cancellation_over_write
default: false
- name: trace_record_callops
default: false
- name: event_engine_dns
default: false
- name: work_stealing
default: false
- name: client_privacy
default: false
- name: canary_client_privacy
default: false
- name: server_privacy
default: false

@ -41,12 +41,14 @@ if sys.argv[1:] == ["--check"]:
with open('src/core/lib/experiments/experiments.yaml') as f:
attrs = yaml.safe_load(f.read())
with open('src/core/lib/experiments/rollouts.yaml') as f:
rollouts = yaml.safe_load(f.read())
DEFAULTS = {
'broken': 'false',
False: 'false',
True: 'true',
'debug': 'kDefaultForDebugOnly',
'release': 'kDefaultForReleaseOnly',
}
FINAL_RETURN = {
@ -54,7 +56,6 @@ FINAL_RETURN = {
False: 'return false;',
True: 'return true;',
'debug': '#ifdef NDEBUG\nreturn false;\n#else\nreturn true;\n#endif',
'release': '#ifdef NDEBUG\nreturn true;\n#else\nreturn false;\n#endif',
}
FINAL_DEFINE = {
@ -62,7 +63,6 @@ FINAL_DEFINE = {
False: None,
True: '#define %s',
'debug': '#ifndef NDEBUG\n#define %s\n#endif',
'release': '#ifdef NDEBUG\n#define %s\n#endif',
}
BZL_LIST_FOR_DEFAULTS = {
@ -70,13 +70,24 @@ BZL_LIST_FOR_DEFAULTS = {
False: 'off',
True: 'on',
'debug': 'dbg',
'release': 'opt',
}
error = False
today = datetime.date.today()
two_quarters_from_now = today + datetime.timedelta(days=180)
experiment_annotation = 'gRPC experiments:'
for rollout_attr in rollouts:
if 'name' not in rollout_attr:
print("experiment with no name: %r" % attr)
error = True
continue
if 'default' not in rollout_attr:
print("no default for experiment %s" % rollout_attr['name'])
error = True
if rollout_attr['default'] not in DEFAULTS:
print("invalid default for experiment %s: %r" %
(rollout_attr['name'], rollout_attr['default']))
error = True
for attr in attrs:
if 'name' not in attr:
print("experiment with no name: %r" % attr)
@ -85,13 +96,6 @@ for attr in attrs:
if 'description' not in attr:
print("no description for experiment %s" % attr['name'])
error = True
if 'default' not in attr:
print("no default for experiment %s" % attr['name'])
error = True
if attr['default'] not in DEFAULTS:
print("invalid default for experiment %s: %r" %
(attr['name'], attr['default']))
error = True
if 'owner' not in attr:
print("no owner for experiment %s" % attr['name'])
error = True
@ -165,6 +169,14 @@ def put_copyright(file, prefix):
put_banner([file], [line[2:].rstrip() for line in copyright], prefix)
def get_rollout_attr_for_experiment(name):
for rollout_attr in rollouts:
if rollout_attr['name'] == name:
return rollout_attr
print('WARNING. experiment: %r has no rollout config. Disabling it.' % name)
return {'name': name, 'default': 'false'}
WTF = """
This file contains the autogenerated parts of the experiments API.
@ -218,13 +230,14 @@ with open('src/core/lib/experiments/experiments.h', 'w') as H:
print(file=H)
print("#ifdef GRPC_EXPERIMENTS_ARE_FINAL", file=H)
for i, attr in enumerate(attrs):
define_fmt = FINAL_DEFINE[attr['default']]
rollout_attr = get_rollout_attr_for_experiment(attr['name'])
define_fmt = FINAL_DEFINE[rollout_attr['default']]
if define_fmt:
print(define_fmt %
("GRPC_EXPERIMENT_IS_INCLUDED_%s" % attr['name'].upper()),
file=H)
print("inline bool Is%sEnabled() { %s }" %
(snake_to_pascal(attr['name']), FINAL_RETURN[attr['default']]),
print("inline bool Is%sEnabled() { %s }" % (snake_to_pascal(
attr['name']), FINAL_RETURN[rollout_attr['default']]),
file=H)
print("#else", file=H)
for i, attr in enumerate(attrs):
@ -261,18 +274,18 @@ with open('src/core/lib/experiments/experiments.cc', 'w') as C:
print("const char* const description_%s = %s;" %
(attr['name'], c_str(attr['description'])),
file=C)
have_defaults = set(DEFAULTS[attr['default']] for attr in attrs)
if 'kDefaultForDebugOnly' in have_defaults or 'kDefaultForReleaseOnly' in have_defaults:
print("const char* const additional_constraints_%s = \"\";" %
attr['name'],
file=C)
have_defaults = set(
DEFAULTS[rollout_attr['default']] for rollout_attr in rollouts)
if 'kDefaultForDebugOnly' in have_defaults:
print("#ifdef NDEBUG", file=C)
if 'kDefaultForDebugOnly' in have_defaults:
print("const bool kDefaultForDebugOnly = false;", file=C)
if 'kDefaultForReleaseOnly' in have_defaults:
print("const bool kDefaultForReleaseOnly = true;", file=C)
print("#else", file=C)
if 'kDefaultForDebugOnly' in have_defaults:
print("const bool kDefaultForDebugOnly = true;", file=C)
if 'kDefaultForReleaseOnly' in have_defaults:
print("const bool kDefaultForReleaseOnly = false;", file=C)
print("#endif", file=C)
print("}", file=C)
print(file=C)
@ -280,9 +293,11 @@ with open('src/core/lib/experiments/experiments.cc', 'w') as C:
print(file=C)
print("const ExperimentMetadata g_experiment_metadata[] = {", file=C)
for attr in attrs:
rollout_attr = get_rollout_attr_for_experiment(attr['name'])
print(
" {%s, description_%s, %s, %s}," %
(c_str(attr['name']), attr['name'], DEFAULTS[attr['default']],
" {%s, description_%s, additional_constraints_%s, %s, %s}," %
(c_str(attr['name']), attr['name'], attr['name'],
DEFAULTS[rollout_attr['default']],
'true' if attr.get('allow_in_fuzzing_config', True) else 'false'),
file=C)
print("};", file=C)
@ -295,8 +310,10 @@ bzl_to_tags_to_experiments = dict((key, collections.defaultdict(list))
if key is not None)
for attr in attrs:
rollout_attr = get_rollout_attr_for_experiment(attr['name'])
for tag in attr['test_tags']:
bzl_to_tags_to_experiments[attr['default']][tag].append(attr['name'])
bzl_to_tags_to_experiments[rollout_attr['default']][tag].append(
attr['name'])
with open('bazel/experiments.bzl', 'w') as B:
put_copyright(B, "#")

Loading…
Cancel
Save