@ -13,6 +13,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Script to extract build metadata from bazel BUILD.
# To avoid having two sources of truth for the build metadata (build
# targets, source files, header files etc.), this script analyzes the contents
# of bazel BUILD files and generates a YAML file (currently called
# build_autogenerated.yaml). The format and semantics of the generated YAML files
# is chosen to match the format of a "build.yaml" file, which used
# to be build the source of truth for gRPC build before bazel became
# the primary build system.
# A good basic overview of the "build.yaml" format is available here:
# https://github.com/grpc/grpc/blob/master/templates/README.md. Note that
# while useful as an overview, the doc does not act as formal spec
# (formal spec does not exist in fact) and the doc can be incomplete,
# inaccurate or slightly out of date.
# TODO(jtattermusch): In the future we want to get rid of the legacy build.yaml
# format entirely or simplify it to a point where it becomes self-explanatory
# and doesn't need any detailed documentation.
import subprocess
import yaml
import xml . etree . ElementTree as ET
@ -32,6 +49,7 @@ def _bazel_query_xml_tree(query):
def _rule_dict_from_xml_node ( rule_xml_node ) :
""" Converts XML node representing a rule (obtained from " bazel query --output xml " ) to a dictionary that contains all the metadata we will need. """
result = {
' class ' : rule_xml_node . attrib . get ( ' class ' ) ,
' name ' : rule_xml_node . attrib . get ( ' name ' ) ,
@ -63,6 +81,7 @@ def _rule_dict_from_xml_node(rule_xml_node):
def _extract_rules_from_bazel_xml ( xml_tree ) :
""" Extract bazel rules from an XML tree node obtained from " bazel query --output xml " command. """
result = { }
for child in xml_tree :
if child . tag == ' rule ' :
@ -133,8 +152,13 @@ def _extract_deps(bazel_rule):
def _create_target_from_bazel_rule ( target_name , bazel_rules ) :
# extract the deps from bazel
""" Create build.yaml-like target definition from bazel metadata """
bazel_rule = bazel_rules [ _get_bazel_label ( target_name ) ]
# Create a template for our target from the bazel rule. Initially we only
# populate some "private" fields with the original info we got from bazel
# and only later we will populate the public fields (once we do some extra
# postprocessing).
result = {
' name ' : target_name ,
' _PUBLIC_HEADERS_BAZEL ' : _extract_public_headers ( bazel_rule ) ,
@ -312,22 +336,37 @@ def _expand_intermediate_deps(target_dict, public_dep_names, bazel_rules):
def _generate_build_metadata ( build_extra_metadata , bazel_rules ) :
""" Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific " extra metadata " . """
lib_names = build_extra_metadata . keys ( )
result = { }
for lib_name in lib_names :
lib_dict = _create_target_from_bazel_rule ( lib_name , bazel_rules )
# Figure out the final list of headers and sources for given target.
# While this is mostly based on bazel build metadata, build.yaml does
# not necessarily expose all the targets that are present in bazel build.
# These "intermediate dependencies" might get flattened.
# TODO(jtattermusch): This is done to avoid introducing too many intermediate
# libraries into the build.yaml-based builds (which might in cause issues
# building language-specific artifacts) and also because the libraries
# in build.yaml-based build are generally considered units of distributions
# (= public libraries that are visible to the user and are installable),
# while in bazel builds it is customary to define larger number of smaller
# "sublibraries". The need for elision (and expansion)
# of intermediate libraries can be re-evaluated in the future.
_expand_intermediate_deps ( lib_dict , lib_names , bazel_rules )
# populate extra properties from build metadata
# populate extra properties from the build.yaml-specific "extra metadata"
lib_dict . update ( build_extra_metadata . get ( lib_name , { } ) )
# store to results
result [ lib_name ] = lib_dict
# rename some targets to something else
# this needs to be made after we're done with most of processing logic
# Rename targets marked with "_RENAME" extra metadata.
# This is mostly a cosmetic change to ensure that we end up with build.yaml target
# names we're used to from the past (and also to avoid too long target names).
# The rename step needs to be made after we're done with most of processing logic
# otherwise the already-renamed libraries will have different names than expected
for lib_name in lib_names :
to_name = build_extra_metadata . get ( lib_name , { } ) . get ( ' _RENAME ' , None )
@ -410,8 +449,8 @@ def _extract_cc_tests(bazel_rules):
return list ( sorted ( result ) )
def _filter _cc_tests ( tests ) :
""" Filters out tests that we don ' t want or we cannot build them reasonably """
def _exclude_unwanted _cc_tests ( tests ) :
""" Filters out bazel tests that we don ' t want to run with other build systems or we cannot build them reasonably """
# most qps tests are autogenerated, we are fine without them
tests = list (
@ -478,6 +517,7 @@ def _filter_cc_tests(tests):
def _generate_build_extra_metadata_for_tests ( tests , bazel_rules ) :
""" For given tests, generate the " extra metadata " that we need for our " build.yaml " -like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics. """
test_metadata = { }
for test in tests :
test_dict = { ' build ' : ' test ' , ' _TYPE ' : ' target ' }
@ -573,6 +613,16 @@ def _generate_build_extra_metadata_for_tests(tests, bazel_rules):
return test_metadata
def _detect_and_print_issues ( build_yaml_like ) :
""" Try detecting some unusual situations and warn about them. """
for tgt in build_yaml_like [ ' targets ' ] :
if tgt [ ' build ' ] == ' test ' :
for src in tgt [ ' src ' ] :
if src . startswith ( ' src/ ' ) and not src . endswith ( ' .proto ' ) :
print ( ' source file from under " src/ " tree used in test ' +
tgt [ ' name ' ] + ' : ' + src )
# extra metadata that will be used to construct build.yaml
# there are mostly extra properties that we weren't able to obtain from the bazel build
# _TYPE: whether this is library, target or test
@ -943,31 +993,138 @@ _BAZEL_DEPS_QUERIES = [
' deps( " //src/proto/... " ) ' ,
]
# Step 1: run a bunch of "bazel query --output xml" queries to collect
# the raw build metadata from the bazel build.
# At the end of this step we will have a dictionary of bazel rules
# that are interesting to us (libraries, binaries, etc.) along
# with their most important metadata (sources, headers, dependencies)
#
# Example of a single bazel rule after being populated:
# '//:grpc' : { 'class': 'cc_library',
# 'hdrs': ['//:include/grpc/byte_buffer.h', ... ],
# 'srcs': ['//:src/core/lib/surface/init.cc', ... ],
# 'deps': ['//:grpc_common', ...],
# ... }
bazel_rules = { }
for query in _BAZEL_DEPS_QUERIES :
bazel_rules . update (
_extract_rules_from_bazel_xml ( _bazel_query_xml_tree ( query ) ) )
# Step 1a: Knowing the transitive closure of dependencies will make
# the postprocessing simpler, so compute the info for all our rules.
#
# Example:
# '//:grpc' : { ...,
# 'transitive_deps': ['//:gpr_base', ...] }
_populate_transitive_deps ( bazel_rules )
tests = _filter_cc_tests ( _extract_cc_tests ( bazel_rules ) )
test_metadata = _generate_build_extra_metadata_for_tests ( tests , bazel_rules )
all_metadata = { }
all_metadata . update ( _BUILD_EXTRA_METADATA )
all_metadata . update ( test_metadata )
all_targets_dict = _generate_build_metadata ( all_metadata , bazel_rules )
# Step 2: Extract the known bazel cc_test tests. While most tests
# will be buildable with other build systems just fine, some of these tests
# would be too difficult to build and run with other build systems,
# so we simply exclude the ones we don't want.
# Note that while making tests buildable with other build systems
# than just bazel is extra effort, we still need to do that for these
# reasons:
# - If our cmake build doesn't have any tests at all, it's hard to make
# sure that what it built actually works (we need at least some "smoke tests").
# This is quite important because the build flags between bazel / non-bazel flag might differ
# (sometimes it's for interesting reasons that are not easy to overcome)
# which makes it even more important to have at least some tests for cmake/make
# - Our portability suite actually runs cmake tests and migration of portability
# suite fully towards bazel might be intricate (e.g. it's unclear whether it's
# possible to get a good enough coverage of different compilers / distros etc.
# with bazel)
# - some things that are considered "tests" in build.yaml-based builds are actually binaries
# we'd want to be able to build anyway (qps_json_worker, interop_client, interop_server, grpc_cli)
# so it's unclear how much make/cmake simplification we would gain by removing just some (but not all) test
# TODO(jtattermusch): Investigate feasibility of running portability suite with bazel.
tests = _exclude_unwanted_cc_tests ( _extract_cc_tests ( bazel_rules ) )
# Step 3: Generate the "extra metadata" for all our build targets.
# While the bazel rules give us most of the information we need,
# the legacy "build.yaml" format requires some additional fields that
# we cannot get just from bazel alone (we call that "extra metadata").
# In this step, we basically analyze the build metadata we have from bazel
# and use heuristics to determine (and sometimes guess) the right
# extra metadata to use for each target.
#
# - For some targets (such as the public libraries, helper libraries
# and executables) determining the right extra metadata is hard to do
# automatically. For these targets, the extra metadata is supplied "manually"
# in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match
# the semantics of the legacy "build.yaml" as closely as possible.
#
# - For test binaries, it is possible to generate the "extra metadata" mostly
# automatically using a rule-based heuristic approach because most tests
# look and behave alike from the build's perspective.
#
# TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or
# the heuristic approach used for tests are ideal and they cannot be made
# to cover all possible situations (and are tailored to work with the way
# the grpc build currently works), but the idea was to start with something
# reasonably simple that matches the "build.yaml"-like semantics as closely
# as possible (to avoid changing too many things at once) and gradually get
# rid of the legacy "build.yaml"-specific fields one by one. Once that is done,
# only very little "extra metadata" would be needed and/or it would be trivial
# to generate it automatically.
all_extra_metadata = { }
all_extra_metadata . update ( _BUILD_EXTRA_METADATA )
all_extra_metadata . update (
_generate_build_extra_metadata_for_tests ( tests , bazel_rules ) )
# Step 4: Generate the final metadata for all the targets.
# This is done by combining the bazel build metadata and the "extra metadata"
# we obtained in the previous step.
# In this step, we also perform some interesting massaging of the target metadata
# to end up with a result that is as similar to the legacy build.yaml data
# as possible.
# - Some targets get renamed (to match the legacy build.yaml target names)
# - Some intermediate libraries get elided ("expanded") to better match the set
# of targets provided by the legacy build.yaml build
#
# Originally the target renaming was introduced to address these concerns:
# - avoid changing too many things at the same time and avoid people getting
# confused by some well know targets suddenly being missing
# - Makefile/cmake and also language-specific generators rely on some build
# targets being called exactly the way they they are. Some of our testing
# scrips also invoke executables (e.g. "qps_json_driver") by their name.
# - The autogenerated test name from bazel includes the package path
# (e.g. "test_cpp_TEST_NAME"). Without renaming, the target names would
# end up pretty ugly (e.g. test_cpp_qps_qps_json_driver).
# TODO(jtattermusch): reevaluate the need for target renaming in the future.
#
# Example of a single generated target:
# 'grpc' : { 'language': 'c',
# 'public_headers': ['include/grpc/byte_buffer.h', ... ],
# 'headers': ['src/core/ext/filters/client_channel/client_channel.h', ... ],
# 'src': ['src/core/lib/surface/init.cc', ... ],
# 'deps': ['gpr', 'address_sorting', ...],
# ... }
all_targets_dict = _generate_build_metadata ( all_extra_metadata , bazel_rules )
# Step 5: convert the dictionary with all the targets to a dict that has
# the desired "build.yaml"-like layout.
# TODO(jtattermusch): We use the custom "build.yaml"-like layout because
# currently all other build systems use that format as their source of truth.
# In the future, we can get rid of this custom & legacy format entirely,
# but we would need to update the generators for other build systems
# at the same time.
#
# Layout of the result:
# { 'libs': { TARGET_DICT_FOR_LIB_XYZ, ... },
# 'targets': { TARGET_DICT_FOR_BIN_XYZ, ... },
# 'tests': { TARGET_DICT_FOR_TEST_XYZ, ...} }
build_yaml_like = _convert_to_build_yaml_like ( all_targets_dict )
# if a test uses source files from src/ directly, it's a little bit suspicious
for tgt in build_yaml_like [ ' targets ' ] :
if tgt [ ' build ' ] == ' test ' :
for src in tgt [ ' src ' ] :
if src . startswith ( ' src/ ' ) and not src . endswith ( ' .proto ' ) :
print ( ' source file from under " src/ " tree used in test ' +
tgt [ ' name ' ] + ' : ' + src )
# detect and report some suspicious situations we've seen before
_detect_and_print_issues ( build_yaml_like )
# Step 6: Store the build_autogenerated.yaml in a deterministic (=sorted)
# and cleaned-up form.
# A basic overview of the resulting "build.yaml"-like format is here:
# https://github.com/grpc/grpc/blob/master/templates/README.md
# TODO(jtattermusch): The "cleanup" function is taken from the legacy
# build system (which used build.yaml) and can be eventually removed.
build_yaml_string = build_cleaner . cleaned_build_yaml_dict_as_string (
build_yaml_like )
with open ( ' build_autogenerated.yaml ' , ' w ' ) as file :