diff --git a/tools/buildgen/extract_metadata_from_bazel_xml.py b/tools/buildgen/extract_metadata_from_bazel_xml.py index 9d0dc2e7c26..cac432ac3c6 100755 --- a/tools/buildgen/extract_metadata_from_bazel_xml.py +++ b/tools/buildgen/extract_metadata_from_bazel_xml.py @@ -13,6 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Script to extract build metadata from bazel BUILD. +# To avoid having two sources of truth for the build metadata (build +# targets, source files, header files etc.), this script analyzes the contents +# of bazel BUILD files and generates a YAML file (currently called +# build_autogenerated.yaml). The format and semantics of the generated YAML files +# is chosen to match the format of a "build.yaml" file, which used +# to be build the source of truth for gRPC build before bazel became +# the primary build system. + import subprocess import yaml import xml.etree.ElementTree as ET @@ -32,6 +41,7 @@ def _bazel_query_xml_tree(query): def _rule_dict_from_xml_node(rule_xml_node): + """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need.""" result = { 'class': rule_xml_node.attrib.get('class'), 'name': rule_xml_node.attrib.get('name'), @@ -63,6 +73,7 @@ def _rule_dict_from_xml_node(rule_xml_node): def _extract_rules_from_bazel_xml(xml_tree): + """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command.""" result = {} for child in xml_tree: if child.tag == 'rule': @@ -133,8 +144,13 @@ def _extract_deps(bazel_rule): def _create_target_from_bazel_rule(target_name, bazel_rules): - # extract the deps from bazel + """Create build.yaml-like target definition from bazel metadata""" bazel_rule = bazel_rules[_get_bazel_label(target_name)] + + # Create a template for our target from the bazel rule. Initially we only + # populate some "private" fields with the original info we got from bazel + # and only later we will populate the public fields (once we do some extra + # postprocessing). result = { 'name': target_name, '_PUBLIC_HEADERS_BAZEL': _extract_public_headers(bazel_rule), @@ -312,22 +328,33 @@ def _expand_intermediate_deps(target_dict, public_dep_names, bazel_rules): def _generate_build_metadata(build_extra_metadata, bazel_rules): + """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata".""" lib_names = build_extra_metadata.keys() result = {} for lib_name in lib_names: lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules) + # Figure out the final list of headers and sources for given target. + # While this is mostly based on bazel build metadata, build.yaml does + # not necessarily expose all the targets that are present in bazel build. + # These "intermediate dependencies" might get flattened. + # TODO(jtattermusch): This is done to avoid introducing too many intermediate + # libraries into the build.yaml-based builds (which might in turn cause issues + # building language-specific artifacts). The need for elision (and expansion) + # of intermediate libraries can be re-evaluated in the future. _expand_intermediate_deps(lib_dict, lib_names, bazel_rules) - # populate extra properties from build metadata + # populate extra properties from the build.yaml-specific "extra metadata" lib_dict.update(build_extra_metadata.get(lib_name, {})) # store to results result[lib_name] = lib_dict - # rename some targets to something else - # this needs to be made after we're done with most of processing logic + # Rename targets marked with "_RENAME" extra metadata. + # This is mostly a cosmetic change to ensure that we end up with build.yaml target + # names we're used to from the past (and also to avoid too long target names). + # The rename step needs to be made after we're done with most of processing logic # otherwise the already-renamed libraries will have different names than expected for lib_name in lib_names: to_name = build_extra_metadata.get(lib_name, {}).get('_RENAME', None) @@ -410,8 +437,8 @@ def _extract_cc_tests(bazel_rules): return list(sorted(result)) -def _filter_cc_tests(tests): - """Filters out tests that we don't want or we cannot build them reasonably""" +def _exclude_unwanted_cc_tests(tests): + """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably""" # most qps tests are autogenerated, we are fine without them tests = list( @@ -472,6 +499,7 @@ def _filter_cc_tests(tests): def _generate_build_extra_metadata_for_tests(tests, bazel_rules): + """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics.""" test_metadata = {} for test in tests: test_dict = {'build': 'test', '_TYPE': 'target'} @@ -567,6 +595,16 @@ def _generate_build_extra_metadata_for_tests(tests, bazel_rules): return test_metadata +def _detect_and_print_issues(build_yaml_like): + """Try detecting some unusual situations and warn about them.""" + for tgt in build_yaml_like['targets']: + if tgt['build'] == 'test': + for src in tgt['src']: + if src.startswith('src/') and not src.endswith('.proto'): + print('source file from under "src/" tree used in test ' + + tgt['name'] + ': ' + src) + + # extra metadata that will be used to construct build.yaml # there are mostly extra properties that we weren't able to obtain from the bazel build # _TYPE: whether this is library, target or test @@ -937,31 +975,86 @@ _BAZEL_DEPS_QUERIES = [ 'deps("//src/proto/...")', ] +# Step 1: run a bunch of "bazel query --output xml" queries to collect +# the raw build metadata from the bazel build. +# At the end of this step we will have a dictionary of bazel rules +# that are interesting to us (libraries, binaries, etc.) along +# with their most important metadata (sources, headers, dependencies) bazel_rules = {} for query in _BAZEL_DEPS_QUERIES: bazel_rules.update( _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query))) +# Step 1a: Knowing the transitive closure of dependencies will make +# the postprocessing simpler, so compute the info for all our rules. _populate_transitive_deps(bazel_rules) -tests = _filter_cc_tests(_extract_cc_tests(bazel_rules)) -test_metadata = _generate_build_extra_metadata_for_tests(tests, bazel_rules) - -all_metadata = {} -all_metadata.update(_BUILD_EXTRA_METADATA) -all_metadata.update(test_metadata) - -all_targets_dict = _generate_build_metadata(all_metadata, bazel_rules) +# Step 2: Extract the known bazel cc_test tests. While most tests +# will be buildable with other build systems just fine, some of these tests +# would be too difficult to build and run with other build systems, +# so we simply the ones we don't want. +tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules)) + +# Step 3: Generate the "extra metadata" for all our build targets. +# While the bazel rules give us most of the information we need, +# the legacy "build.yaml" format requires some additional fields that +# we cannot get just from bazel alone (we call that "extra metadata"). +# In this step, we basically analyze the build metadata we have from bazel +# and use heuristics to determine (and sometimes guess) the right +# extra metadata to use for each target. +# +# - For some targets (such as the public libraries, helper libraries +# and executables) determining the right extra metadata is hard to do +# automatically. For these targets, the extra metadata is supplied "manually" +# in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match +# the semantics of the legacy "build.yaml" as closely as possible. +# +# - For test binaries, it is possible to generate the "extra metadata" mostly +# automatically using a rule-based heuristic approach because most tests +# look and behave alike from the build's perspective. +# +# TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or +# the heuristic approach used for tests are ideal and they cannot be made +# to cover all possible situations (and are tailored to work with the way +# the grpc build currently works), but the idea was to start with something +# reasonably simple that matches the "build.yaml"-like semantics as closely +# as possible (to avoid changing too many things at once) and gradually get +# rid of the legacy "build.yaml"-specific fields one by one. Once that is done, +# only very little "extra metadata" would be needed and/or it would be trivial +# to generate it automatically. +all_extra_metadata = {} +all_extra_metadata.update(_BUILD_EXTRA_METADATA) +all_extra_metadata.update( + _generate_build_extra_metadata_for_tests(tests, bazel_rules)) + +# Step 4: Generate the final metadata for all the targets. +# This is done by combining the bazel build metadata and the "extra metadata" +# we obtained in the previous step. +# In this step, we also perform some interesting massaging of the target metadata +# to end up with a result that is as similar to the legacy build.yaml data +# as possible. +# - Some targets get renamed (to match the legacy build.yaml target names) +# - Some intermediate libraries get elided ("expanded") to better match the set +# of targets provided by the legacy build.yaml build +all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules) + +# Step 5: convert the dictionary with all the targets to a dict that has +# the desired "build.yaml"-like layout. +# TODO(jtattermusch): We use the custom "build.yaml"-like layout because +# currently all other build systems use that format as their source of truth. +# In the future, we can get rid of this custom & legacy format entirely, +# but we would need to update the generators for other build systems +# at the same time. build_yaml_like = _convert_to_build_yaml_like(all_targets_dict) -# if a test uses source files from src/ directly, it's a little bit suspicious -for tgt in build_yaml_like['targets']: - if tgt['build'] == 'test': - for src in tgt['src']: - if src.startswith('src/') and not src.endswith('.proto'): - print('source file from under "src/" tree used in test ' + - tgt['name'] + ': ' + src) +_detect_and_print_issues( + build_yaml_like +) # detect and report some suspicious situations we've seen before +# Step 6: Store the build_autogenerated.yaml in a deterministic (=sorted) +# and cleaned-up form. +# TODO(jtattermusch): The "cleanup" function is taken from the legacy +# build system (which used build.yaml) and can be eventually removed. build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string( build_yaml_like) with open('build_autogenerated.yaml', 'w') as file: