#!/usr/bin/env python3 # Copyright 2020 The gRPC Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Script to extract build metadata from bazel BUILD. # To avoid having two sources of truth for the build metadata (build # targets, source files, header files etc.), this script analyzes the contents # of bazel BUILD files and generates a YAML file (currently called # build_autogenerated.yaml). The format and semantics of the generated YAML files # is chosen to match the format of a "build.yaml" file, which used # to be build the source of truth for gRPC build before bazel became # the primary build system. # A good basic overview of the "build.yaml" format is available here: # https://github.com/grpc/grpc/blob/master/templates/README.md. Note that # while useful as an overview, the doc does not act as formal spec # (formal spec does not exist in fact) and the doc can be incomplete, # inaccurate or slightly out of date. # TODO(jtattermusch): In the future we want to get rid of the legacy build.yaml # format entirely or simplify it to a point where it becomes self-explanatory # and doesn't need any detailed documentation. import collections import os import subprocess from typing import Any, Dict, Iterable, List, Optional import xml.etree.ElementTree as ET import build_cleaner BuildMetadata = Dict[str, Any] BuildDict = Dict[str, BuildMetadata] BuildYaml = Dict[str, Any] BuildMetadata = Dict[str, Any] BuildDict = Dict[str, BuildMetadata] BuildYaml = Dict[str, Any] class ExternalProtoLibrary: """ExternalProtoLibrary is the struct about an external proto library. Fields: - destination(int): The relative path of this proto library should be. Preferably, it should match the submodule path. - proto_prefix(str): The prefix to remove in order to insure the proto import is correct. For more info, see description of https://github.com/grpc/grpc/pull/25272. - urls(List[str]): Following 3 fields should be filled by build metadata from Bazel. - hash(str): The hash of the downloaded archive - strip_prefix(str): The path to be stripped from the extracted directory, see http_archive in Bazel. """ def __init__( self, destination, proto_prefix, urls=None, hash="", strip_prefix="" ): self.destination = destination self.proto_prefix = proto_prefix if urls is None: self.urls = [] else: self.urls = urls self.hash = hash self.strip_prefix = strip_prefix EXTERNAL_PROTO_LIBRARIES = { "envoy_api": ExternalProtoLibrary( destination="third_party/envoy-api", proto_prefix="third_party/envoy-api/", ), "com_google_googleapis": ExternalProtoLibrary( destination="third_party/googleapis", proto_prefix="third_party/googleapis/", ), "com_github_cncf_udpa": ExternalProtoLibrary( destination="third_party/xds", proto_prefix="third_party/xds/" ), "opencensus_proto": ExternalProtoLibrary( destination="third_party/opencensus-proto/src", proto_prefix="third_party/opencensus-proto/src/", ), } # We want to get a list of source files for some external libraries # to be able to include them in a non-bazel (e.g. make/cmake) build. # For that we need mapping from external repo name to a corresponding # path to a git submodule. EXTERNAL_SOURCE_PREFIXES = { "@utf8_range": "third_party/utf8_range", "@com_googlesource_code_re2": "third_party/re2", "@com_google_googletest": "third_party/googletest", "@com_google_protobuf": "third_party/upb", "@zlib": "third_party/zlib", } def _bazel_query_xml_tree(query: str) -> ET.Element: """Get xml output of bazel query invocation, parsed as XML tree""" output = subprocess.check_output( ["tools/bazel", "query", "--noimplicit_deps", "--output", "xml", query] ) return ET.fromstring(output) def _rule_dict_from_xml_node(rule_xml_node): """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need.""" result = { "class": rule_xml_node.attrib.get("class"), "name": rule_xml_node.attrib.get("name"), "srcs": [], "hdrs": [], "textual_hdrs": [], "deps": [], "data": [], "tags": [], "args": [], "generator_function": None, "size": None, "flaky": False, "actual": None, # the real target name for aliases } for child in rule_xml_node: # all the metadata we want is stored under "list" tags if child.tag == "list": list_name = child.attrib["name"] if list_name in [ "srcs", "hdrs", "textual_hdrs", "deps", "data", "tags", "args", ]: result[list_name] += [item.attrib["value"] for item in child] if child.tag == "string": string_name = child.attrib["name"] if string_name in ["generator_function", "size"]: result[string_name] = child.attrib["value"] if child.tag == "boolean": bool_name = child.attrib["name"] if bool_name in ["flaky"]: result[bool_name] = child.attrib["value"] == "true" if child.tag == "label": # extract actual name for alias and bind rules label_name = child.attrib["name"] if label_name in ["actual"]: actual_name = child.attrib.get("value", None) if actual_name: result["actual"] = actual_name # HACK: since we do a lot of transitive dependency scanning, # make it seem that the actual name is a dependency of the alias or bind rule # (aliases don't have dependencies themselves) result["deps"].append(actual_name) return result def _extract_rules_from_bazel_xml(xml_tree): """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command.""" result = {} for child in xml_tree: if child.tag == "rule": rule_dict = _rule_dict_from_xml_node(child) rule_clazz = rule_dict["class"] rule_name = rule_dict["name"] if rule_clazz in [ "cc_library", "cc_binary", "cc_test", "cc_proto_library", "cc_proto_gen_validate", "proto_library", "upb_c_proto_library", "upb_proto_reflection_library", "alias", "bind", ]: if rule_name in result: raise Exception("Rule %s already present" % rule_name) result[rule_name] = rule_dict return result def _get_bazel_label(target_name: str) -> str: if target_name.startswith("@"): return target_name if ":" in target_name: return "//%s" % target_name else: return "//:%s" % target_name def _try_extract_source_file_path(label: str) -> str: """Gets relative path to source file from bazel deps listing""" if label.startswith("@"): # This is an external source file. We are only interested in sources # for some of the external libraries. for lib_name, prefix in EXTERNAL_SOURCE_PREFIXES.items(): if label.startswith(lib_name + "//"): return ( label.replace("%s//" % lib_name, prefix + "/") .replace(":", "/") .replace("//", "/") ) # This source file is external, and we need to translate the # @REPO_NAME to a valid path prefix. At this stage, we need # to check repo name, since the label/path mapping is not # available in BUILD files. for lib_name, external_proto_lib in EXTERNAL_PROTO_LIBRARIES.items(): if label.startswith("@" + lib_name + "//"): return label.replace( "@%s//" % lib_name, external_proto_lib.proto_prefix, ).replace(":", "/") # No external library match found return None else: if label.startswith("//"): label = label[len("//") :] # labels in form //:src/core/lib/surface/call_test_only.h if label.startswith(":"): label = label[len(":") :] # labels in form //test/core/util:port.cc return label.replace(":", "/") def _has_header_suffix(label: str) -> bool: """Returns True if the label has a suffix that looks like a C/C++ include file""" return ( label.endswith(".h") or label.endswith(".h") or label.endswith(".hpp") or label.endswith(".inc") ) def _extract_public_headers(bazel_rule: BuildMetadata) -> List[str]: """Gets list of public headers from a bazel rule""" result = [] for dep in bazel_rule["hdrs"]: if dep.startswith("//:include/") and _has_header_suffix(dep): source_file_maybe = _try_extract_source_file_path(dep) if source_file_maybe: result.append(source_file_maybe) return list(sorted(result)) def _extract_nonpublic_headers(bazel_rule: BuildMetadata) -> List[str]: """Gets list of non-public headers from a bazel rule""" result = [] for dep in list( bazel_rule["hdrs"] + bazel_rule["textual_hdrs"] + bazel_rule["srcs"] ): if not dep.startswith("//:include/") and _has_header_suffix(dep): source_file_maybe = _try_extract_source_file_path(dep) if source_file_maybe: result.append(source_file_maybe) return list(sorted(result)) def _extract_sources(bazel_rule: BuildMetadata) -> List[str]: """Gets list of source files from a bazel rule""" result = [] for src in bazel_rule["srcs"]: # Skip .proto files from the protobuf repo if src.startswith("@com_google_protobuf//") and src.endswith(".proto"): continue if src.endswith(".cc") or src.endswith(".c") or src.endswith(".proto"): source_file_maybe = _try_extract_source_file_path(src) if source_file_maybe: result.append(source_file_maybe) return list(sorted(result)) def _extract_deps( bazel_rule: BuildMetadata, bazel_rules: BuildDict ) -> List[str]: """Gets list of deps from from a bazel rule""" deps = set(bazel_rule["deps"]) for src in bazel_rule["srcs"]: if ( not src.endswith(".cc") and not src.endswith(".c") and not src.endswith(".proto") ): if src in bazel_rules: # This label doesn't point to a source file, but another Bazel # target. This is required for :pkg_cc_proto_validate targets, # and it's generally allowed by Bazel. deps.add(src) return list(sorted(list(deps))) def _create_target_from_bazel_rule( target_name: str, bazel_rules: BuildDict ) -> BuildMetadata: """Create build.yaml-like target definition from bazel metadata""" bazel_rule = bazel_rules[_get_bazel_label(target_name)] # Create a template for our target from the bazel rule. Initially we only # populate some "private" fields with the original info we got from bazel # and only later we will populate the public fields (once we do some extra # postprocessing). result = { "name": target_name, "_PUBLIC_HEADERS_BAZEL": _extract_public_headers(bazel_rule), "_HEADERS_BAZEL": _extract_nonpublic_headers(bazel_rule), "_SRC_BAZEL": _extract_sources(bazel_rule), "_DEPS_BAZEL": _extract_deps(bazel_rule, bazel_rules), "public_headers": bazel_rule["_COLLAPSED_PUBLIC_HEADERS"], "headers": bazel_rule["_COLLAPSED_HEADERS"], "src": bazel_rule["_COLLAPSED_SRCS"], "deps": bazel_rule["_COLLAPSED_DEPS"], } return result def _external_dep_name_from_bazel_dependency(bazel_dep: str) -> Optional[str]: """Returns name of dependency if external bazel dependency is provided or None""" if bazel_dep.startswith("@com_google_absl//"): # special case for add dependency on one of the absl libraries (there is not just one absl library) prefixlen = len("@com_google_absl//") return bazel_dep[prefixlen:] elif bazel_dep == "@com_github_google_benchmark//:benchmark": return "benchmark" elif bazel_dep == "@boringssl//:ssl": return "libssl" elif bazel_dep == "@com_github_cares_cares//:ares": return "cares" elif ( bazel_dep == "@com_google_protobuf//:protobuf" or bazel_dep == "@com_google_protobuf//:protobuf_headers" ): return "protobuf" elif bazel_dep == "@com_google_protobuf//:protoc_lib": return "protoc" else: # Two options here: # * either this is not external dependency at all (which is fine, we will treat it as internal library) # * this is external dependency, but we don't want to make the dependency explicit in the build metadata # for other build systems. return None def _compute_transitive_metadata( rule_name: str, bazel_rules: Any, bazel_label_to_dep_name: Dict[str, str] ) -> None: """Computes the final build metadata for Bazel target with rule_name. The dependencies that will appear on the deps list are: * Public build targets including binaries and tests; * External targets, like absl, re2. All other intermediate dependencies will be merged, which means their source file, headers, etc. will be collected into one build target. This step of processing will greatly reduce the complexity of the generated build specifications for other build systems, like CMake, Make, setuptools. The final build metadata are: * _TRANSITIVE_DEPS: all the transitive dependencies including intermediate targets; * _COLLAPSED_DEPS: dependencies that fits our requirement above, and it will remove duplicated items and produce the shortest possible dependency list in alphabetical order; * _COLLAPSED_SRCS: the merged source files; * _COLLAPSED_PUBLIC_HEADERS: the merged public headers; * _COLLAPSED_HEADERS: the merged non-public headers; * _EXCLUDE_DEPS: intermediate targets to exclude when performing collapsing of sources and dependencies. For the collapsed_deps, the algorithm improved cases like: The result in the past: end2end_tests -> [grpc_test_util, grpc, gpr, address_sorting, upb] grpc_test_util -> [grpc, gpr, address_sorting, upb, ...] grpc -> [gpr, address_sorting, upb, ...] The result of the algorithm: end2end_tests -> [grpc_test_util] grpc_test_util -> [grpc] grpc -> [gpr, address_sorting, upb, ...] """ bazel_rule = bazel_rules[rule_name] direct_deps = _extract_deps(bazel_rule, bazel_rules) transitive_deps = set() collapsed_deps = set() exclude_deps = set() collapsed_srcs = set(_extract_sources(bazel_rule)) collapsed_public_headers = set(_extract_public_headers(bazel_rule)) collapsed_headers = set(_extract_nonpublic_headers(bazel_rule)) for dep in direct_deps: external_dep_name_maybe = _external_dep_name_from_bazel_dependency(dep) if dep in bazel_rules: # Descend recursively, but no need to do that for external deps if external_dep_name_maybe is None: if "_PROCESSING_DONE" not in bazel_rules[dep]: # This item is not processed before, compute now _compute_transitive_metadata( dep, bazel_rules, bazel_label_to_dep_name ) transitive_deps.update( bazel_rules[dep].get("_TRANSITIVE_DEPS", []) ) collapsed_deps.update( collapsed_deps, bazel_rules[dep].get("_COLLAPSED_DEPS", []) ) exclude_deps.update(bazel_rules[dep].get("_EXCLUDE_DEPS", [])) # This dep is a public target, add it as a dependency if dep in bazel_label_to_dep_name: transitive_deps.update([bazel_label_to_dep_name[dep]]) collapsed_deps.update( collapsed_deps, [bazel_label_to_dep_name[dep]] ) # Add all the transitive deps of our every public dep to exclude # list since we want to avoid building sources that are already # built by our dependencies exclude_deps.update(bazel_rules[dep]["_TRANSITIVE_DEPS"]) continue # This dep is an external target, add it as a dependency if external_dep_name_maybe is not None: transitive_deps.update([external_dep_name_maybe]) collapsed_deps.update(collapsed_deps, [external_dep_name_maybe]) continue # Direct dependencies are part of transitive dependencies transitive_deps.update(direct_deps) # Calculate transitive public deps (needed for collapsing sources) transitive_public_deps = set( [x for x in transitive_deps if x in bazel_label_to_dep_name] ) # Remove intermediate targets that our public dependencies already depend # on. This is the step that further shorten the deps list. collapsed_deps = set([x for x in collapsed_deps if x not in exclude_deps]) # Compute the final source files and headers for this build target whose # name is `rule_name` (input argument of this function). # # Imaging a public target PX has transitive deps [IA, IB, PY, IC, PZ]. PX, # PY and PZ are public build targets. And IA, IB, IC are intermediate # targets. In addition, PY depends on IC. # # Translate the condition into dependency graph: # PX -> [IA, IB, PY, IC, PZ] # PY -> [IC] # Public targets: [PX, PY, PZ] # # The collapsed dependencies of PX: [PY, PZ]. # The excluded dependencies of X: [PY, IC, PZ]. # (IC is excluded as a dependency of PX. It is already included in PY, hence # it would be redundant to include it again.) # # Target PX should include source files and headers of [PX, IA, IB] as final # build metadata. for dep in transitive_deps: if dep not in exclude_deps and dep not in transitive_public_deps: if dep in bazel_rules: collapsed_srcs.update(_extract_sources(bazel_rules[dep])) collapsed_public_headers.update( _extract_public_headers(bazel_rules[dep]) ) collapsed_headers.update( _extract_nonpublic_headers(bazel_rules[dep]) ) # This item is a "visited" flag bazel_rule["_PROCESSING_DONE"] = True # Following items are described in the docstinrg. bazel_rule["_TRANSITIVE_DEPS"] = list(sorted(transitive_deps)) bazel_rule["_COLLAPSED_DEPS"] = list(sorted(collapsed_deps)) bazel_rule["_COLLAPSED_SRCS"] = list(sorted(collapsed_srcs)) bazel_rule["_COLLAPSED_PUBLIC_HEADERS"] = list( sorted(collapsed_public_headers) ) bazel_rule["_COLLAPSED_HEADERS"] = list(sorted(collapsed_headers)) bazel_rule["_EXCLUDE_DEPS"] = list(sorted(exclude_deps)) # TODO(jtattermusch): deduplicate with transitive_dependencies.py (which has a # slightly different logic) # TODO(jtattermusch): This is done to avoid introducing too many intermediate # libraries into the build.yaml-based builds (which might in cause issues # building language-specific artifacts) and also because the libraries in # build.yaml-based build are generally considered units of distributions (= # public libraries that are visible to the user and are installable), while in # bazel builds it is customary to define larger number of smaller # "sublibraries". The need for elision (and expansion) of intermediate libraries # can be re-evaluated in the future. def _populate_transitive_metadata( bazel_rules: Any, public_dep_names: Iterable[str] ) -> None: """Add 'transitive_deps' field for each of the rules""" # Create the map between Bazel label and public dependency name bazel_label_to_dep_name = {} for dep_name in public_dep_names: bazel_label_to_dep_name[_get_bazel_label(dep_name)] = dep_name # Make sure we reached all the Bazel rules # TODO(lidiz) potentially we could only update a subset of rules for rule_name in bazel_rules: if "_PROCESSING_DONE" not in bazel_rules[rule_name]: _compute_transitive_metadata( rule_name, bazel_rules, bazel_label_to_dep_name ) def update_test_metadata_with_transitive_metadata( all_extra_metadata: BuildDict, bazel_rules: BuildDict ) -> None: """Patches test build metadata with transitive metadata.""" for lib_name, lib_dict in list(all_extra_metadata.items()): # Skip if it isn't not an test if lib_dict.get("build") != "test" or lib_dict.get("_TYPE") != "target": continue bazel_rule = bazel_rules[_get_bazel_label(lib_name)] if "//external:benchmark" in bazel_rule["_TRANSITIVE_DEPS"]: lib_dict["benchmark"] = True lib_dict["defaults"] = "benchmark" if "//external:gtest" in bazel_rule["_TRANSITIVE_DEPS"]: # run_tests.py checks the "gtest" property to see if test should be run via gtest. lib_dict["gtest"] = True # TODO: this might be incorrect categorization of the test... lib_dict["language"] = "c++" def _get_transitive_protos(bazel_rules, t): que = [ t, ] visited = set() ret = [] while que: name = que.pop(0) rule = bazel_rules.get(name, None) if rule: for dep in rule["deps"]: if dep not in visited: visited.add(dep) que.append(dep) for src in rule["srcs"]: if src.endswith(".proto"): ret.append(src) return list(set(ret)) def _expand_upb_proto_library_rules(bazel_rules): # Expand the .proto files from UPB proto library rules into the pre-generated # upb files. GEN_UPB_ROOT = "//:src/core/ext/upb-gen/" GEN_UPBDEFS_ROOT = "//:src/core/ext/upbdefs-gen/" EXTERNAL_LINKS = [ ("@com_google_protobuf//", "src/"), ("@com_google_googleapis//", ""), ("@com_github_cncf_udpa//", ""), ("@com_envoyproxy_protoc_gen_validate//", ""), ("@envoy_api//", ""), ("@opencensus_proto//", ""), ] for name, bazel_rule in bazel_rules.items(): gen_func = bazel_rule.get("generator_function", None) if gen_func in ( "grpc_upb_proto_library", "grpc_upb_proto_reflection_library", ): # get proto dependency deps = bazel_rule["deps"] if len(deps) != 1: raise Exception( 'upb rule "{0}" should have 1 proto dependency but has' ' "{1}"'.format(name, deps) ) # deps is not properly fetched from bazel query for upb_c_proto_library target # so add the upb dependency manually bazel_rule["deps"] = [ "//external:upb_lib", "//external:upb_lib_descriptor", "//external:upb_generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", ] # populate the upb_c_proto_library rule with pre-generated upb headers # and sources using proto_rule protos = _get_transitive_protos(bazel_rules, deps[0]) if len(protos) == 0: raise Exception( 'upb rule "{0}" should have at least one proto file.'.format( name ) ) srcs = [] hdrs = [] for proto_src in protos: for external_link in EXTERNAL_LINKS: if proto_src.startswith(external_link[0]): prefix_to_strip = external_link[0] + external_link[1] if not proto_src.startswith(prefix_to_strip): raise Exception( 'Source file "{0}" in upb rule {1} does not' ' have the expected prefix "{2}"'.format( proto_src, name, prefix_to_strip ) ) proto_src = proto_src[len(prefix_to_strip) :] break if proto_src.startswith("@"): raise Exception('"{0}" is unknown workspace.'.format(name)) proto_src_file = _try_extract_source_file_path(proto_src) if not proto_src_file: raise Exception( 'Failed to get source file for "{0}" in upb rule "{1}".'.format( proto_src, name ) ) extensions = ( # There is no longer a .upb.c extension. [".upb.h", ".upb_minitable.h", ".upb_minitable.c"] if gen_func == "grpc_upb_proto_library" else [".upbdefs.h", ".upbdefs.c"] ) root = ( GEN_UPB_ROOT if gen_func == "grpc_upb_proto_library" else GEN_UPBDEFS_ROOT ) for ext in extensions: srcs.append(root + proto_src_file.replace(".proto", ext)) hdrs.append(root + proto_src_file.replace(".proto", ext)) bazel_rule["srcs"] = srcs bazel_rule["hdrs"] = hdrs def _patch_grpc_proto_library_rules(bazel_rules): for name, bazel_rule in bazel_rules.items(): contains_proto = any( src.endswith(".proto") for src in bazel_rule.get("srcs", []) ) generator_func = bazel_rule.get("generator_function", None) if ( name.startswith("//") and contains_proto and generator_func == "grpc_proto_library" ): # Add explicit protobuf dependency for internal c++ proto targets. bazel_rule["deps"].append("//external:protobuf") def _patch_descriptor_upb_proto_library(bazel_rules): # The upb's descriptor_upb_proto library doesn't reference the generated descriptor.proto # sources explicitly, so we add them manually. bazel_rule = bazel_rules.get( "@com_google_protobuf//upb:descriptor_upb_proto", None ) if bazel_rule: bazel_rule["srcs"].append( ":src/core/ext/upb-gen/google/protobuf/descriptor.upb_minitable.c" ) bazel_rule["hdrs"].append( ":src/core/ext/upb-gen/google/protobuf/descriptor.upb.h" ) def _generate_build_metadata( build_extra_metadata: BuildDict, bazel_rules: BuildDict ) -> BuildDict: """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata".""" lib_names = list(build_extra_metadata.keys()) result = {} for lib_name in lib_names: lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules) # populate extra properties from the build.yaml-specific "extra metadata" lib_dict.update(build_extra_metadata.get(lib_name, {})) # store to results result[lib_name] = lib_dict # Rename targets marked with "_RENAME" extra metadata. # This is mostly a cosmetic change to ensure that we end up with build.yaml target # names we're used to from the past (and also to avoid too long target names). # The rename step needs to be made after we're done with most of processing logic # otherwise the already-renamed libraries will have different names than expected for lib_name in lib_names: to_name = build_extra_metadata.get(lib_name, {}).get("_RENAME", None) if to_name: # store lib under the new name and also change its 'name' property if to_name in result: raise Exception( "Cannot rename target " + str(lib_name) + ", " + str(to_name) + " already exists." ) lib_dict = result.pop(lib_name) lib_dict["name"] = to_name result[to_name] = lib_dict # dep names need to be updated as well for lib_dict_to_update in list(result.values()): lib_dict_to_update["deps"] = list( [ to_name if dep == lib_name else dep for dep in lib_dict_to_update["deps"] ] ) return result def _convert_to_build_yaml_like(lib_dict: BuildMetadata) -> BuildYaml: lib_names = [ lib_name for lib_name in list(lib_dict.keys()) if lib_dict[lib_name].get("_TYPE", "library") == "library" ] target_names = [ lib_name for lib_name in list(lib_dict.keys()) if lib_dict[lib_name].get("_TYPE", "library") == "target" ] test_names = [ lib_name for lib_name in list(lib_dict.keys()) if lib_dict[lib_name].get("_TYPE", "library") == "test" ] # list libraries and targets in predefined order lib_list = [lib_dict[lib_name] for lib_name in lib_names] target_list = [lib_dict[lib_name] for lib_name in target_names] test_list = [lib_dict[lib_name] for lib_name in test_names] # get rid of temporary private fields prefixed with "_" and some other useless fields for lib in lib_list: for field_to_remove in [ k for k in list(lib.keys()) if k.startswith("_") ]: lib.pop(field_to_remove, None) for target in target_list: for field_to_remove in [ k for k in list(target.keys()) if k.startswith("_") ]: target.pop(field_to_remove, None) target.pop( "public_headers", None ) # public headers make no sense for targets for test in test_list: for field_to_remove in [ k for k in list(test.keys()) if k.startswith("_") ]: test.pop(field_to_remove, None) test.pop( "public_headers", None ) # public headers make no sense for tests build_yaml_like = { "libs": lib_list, "filegroups": [], "targets": target_list, "tests": test_list, } return build_yaml_like def _extract_cc_tests(bazel_rules: BuildDict) -> List[str]: """Gets list of cc_test tests from bazel rules""" result = [] for bazel_rule in list(bazel_rules.values()): if bazel_rule["class"] == "cc_test": test_name = bazel_rule["name"] if test_name.startswith("//"): prefixlen = len("//") result.append(test_name[prefixlen:]) return list(sorted(result)) def _exclude_unwanted_cc_tests(tests: List[str]) -> List[str]: """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably""" # most qps tests are autogenerated, we are fine without them tests = [test for test in tests if not test.startswith("test/cpp/qps:")] # microbenchmarks aren't needed for checking correctness tests = [ test for test in tests if not test.startswith("test/cpp/microbenchmarks:") ] tests = [ test for test in tests if not test.startswith("test/core/promise/benchmark:") ] # we have trouble with census dependency outside of bazel tests = [ test for test in tests if not test.startswith("test/cpp/ext/filters/census:") and not test.startswith("test/core/xds:xds_channel_stack_modifier_test") and not test.startswith("test/cpp/ext/gcp:") and not test.startswith("test/cpp/ext/filters/logging:") and not test.startswith("test/cpp/interop:observability_interop") ] # we have not added otel dependency outside of bazel tests = [ test for test in tests if not test.startswith("test/cpp/ext/otel:") and not test.startswith("test/cpp/ext/csm:") and not test.startswith("test/cpp/interop:xds_interop") ] # missing opencensus/stats/stats.h tests = [ test for test in tests if not test.startswith( "test/cpp/end2end:server_load_reporting_end2end_test" ) ] tests = [ test for test in tests if not test.startswith( "test/cpp/server/load_reporter:lb_load_reporter_test" ) ] # The test uses --running_under_bazel cmdline argument # To avoid the trouble needing to adjust it, we just skip the test tests = [ test for test in tests if not test.startswith( "test/cpp/naming:resolver_component_tests_runner_invoker" ) ] # the test requires 'client_crash_test_server' to be built tests = [ test for test in tests if not test.startswith("test/cpp/end2end:time_change_test") ] # the test requires 'client_crash_test_server' to be built tests = [ test for test in tests if not test.startswith("test/cpp/end2end:client_crash_test") ] # the test requires 'server_crash_test_client' to be built tests = [ test for test in tests if not test.startswith("test/cpp/end2end:server_crash_test") ] # test never existed under build.yaml and it fails -> skip it tests = [ test for test in tests if not test.startswith("test/core/tsi:ssl_session_cache_test") ] # the binary of this test does not get built with cmake tests = [ test for test in tests if not test.startswith("test/cpp/util:channelz_sampler_test") ] # we don't need to generate fuzzers outside of bazel tests = [test for test in tests if not test.endswith("_fuzzer")] return tests def _generate_build_extra_metadata_for_tests( tests: List[str], bazel_rules: BuildDict ) -> BuildDict: """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics.""" test_metadata = {} for test in tests: test_dict = {"build": "test", "_TYPE": "target"} bazel_rule = bazel_rules[_get_bazel_label(test)] bazel_tags = bazel_rule["tags"] if "manual" in bazel_tags: # don't run the tests marked as "manual" test_dict["run"] = False if bazel_rule["flaky"]: # don't run tests that are marked as "flaky" under bazel # because that would only add noise for the run_tests.py tests # and seeing more failures for tests that we already know are flaky # doesn't really help anything test_dict["run"] = False if "no_uses_polling" in bazel_tags: test_dict["uses_polling"] = False if "grpc_fuzzer" == bazel_rule["generator_function"]: # currently we hand-list fuzzers instead of generating them automatically # because there's no way to obtain maxlen property from bazel BUILD file. print(("skipping fuzzer " + test)) continue if "bazel_only" in bazel_tags: continue # if any tags that restrict platform compatibility are present, # generate the "platforms" field accordingly # TODO(jtattermusch): there is also a "no_linux" tag, but we cannot take # it into account as it is applied by grpc_cc_test when poller expansion # is made (for tests where uses_polling=True). So for now, we just # assume all tests are compatible with linux and ignore the "no_linux" tag # completely. known_platform_tags = set(["no_windows", "no_mac"]) if set(bazel_tags).intersection(known_platform_tags): platforms = [] # assume all tests are compatible with linux and posix platforms.append("linux") platforms.append( "posix" ) # there is no posix-specific tag in bazel BUILD if "no_mac" not in bazel_tags: platforms.append("mac") if "no_windows" not in bazel_tags: platforms.append("windows") test_dict["platforms"] = platforms cmdline_args = bazel_rule["args"] if cmdline_args: test_dict["args"] = list(cmdline_args) if test.startswith("test/cpp"): test_dict["language"] = "c++" elif test.startswith("test/core"): test_dict["language"] = "c" else: raise Exception("wrong test" + test) # short test name without the path. # There can be name collisions, but we will resolve them later simple_test_name = os.path.basename(_try_extract_source_file_path(test)) test_dict["_RENAME"] = simple_test_name test_metadata[test] = test_dict # detect duplicate test names tests_by_simple_name = {} for test_name, test_dict in list(test_metadata.items()): simple_test_name = test_dict["_RENAME"] if simple_test_name not in tests_by_simple_name: tests_by_simple_name[simple_test_name] = [] tests_by_simple_name[simple_test_name].append(test_name) # choose alternative names for tests with a name collision for collision_list in list(tests_by_simple_name.values()): if len(collision_list) > 1: for test_name in collision_list: long_name = test_name.replace("/", "_").replace(":", "_") print( 'short name of "%s" collides with another test, renaming' " to %s" % (test_name, long_name) ) test_metadata[test_name]["_RENAME"] = long_name return test_metadata def _parse_http_archives(xml_tree: ET.Element) -> "List[ExternalProtoLibrary]": """Parse Bazel http_archive rule into ExternalProtoLibrary objects.""" result = [] for xml_http_archive in xml_tree: if ( xml_http_archive.tag != "rule" or xml_http_archive.attrib["class"] != "http_archive" ): continue # A distilled Python representation of Bazel http_archive http_archive = dict() for xml_node in xml_http_archive: if xml_node.attrib["name"] == "name": http_archive["name"] = xml_node.attrib["value"] if xml_node.attrib["name"] == "urls": http_archive["urls"] = [] for url_node in xml_node: http_archive["urls"].append(url_node.attrib["value"]) if xml_node.attrib["name"] == "url": http_archive["urls"] = [xml_node.attrib["value"]] if xml_node.attrib["name"] == "sha256": http_archive["hash"] = xml_node.attrib["value"] if xml_node.attrib["name"] == "strip_prefix": http_archive["strip_prefix"] = xml_node.attrib["value"] if http_archive["name"] not in EXTERNAL_PROTO_LIBRARIES: # If this http archive is not one of the external proto libraries, # we don't want to include it as a CMake target continue lib = EXTERNAL_PROTO_LIBRARIES[http_archive["name"]] lib.urls = http_archive["urls"] lib.hash = http_archive["hash"] lib.strip_prefix = http_archive["strip_prefix"] result.append(lib) return result def _generate_external_proto_libraries() -> List[Dict[str, Any]]: """Generates the build metadata for external proto libraries""" xml_tree = _bazel_query_xml_tree("kind(http_archive, //external:*)") libraries = _parse_http_archives(xml_tree) libraries.sort(key=lambda x: x.destination) return list(map(lambda x: x.__dict__, libraries)) def _detect_and_print_issues(build_yaml_like: BuildYaml) -> None: """Try detecting some unusual situations and warn about them.""" for tgt in build_yaml_like["targets"]: if tgt["build"] == "test": for src in tgt["src"]: if src.startswith("src/") and not src.endswith(".proto"): print( ( 'source file from under "src/" tree used in test ' + tgt["name"] + ": " + src ) ) # extra metadata that will be used to construct build.yaml # there are mostly extra properties that we weren't able to obtain from the bazel build # _TYPE: whether this is library, target or test # _RENAME: whether this target should be renamed to a different name (to match expectations of make and cmake builds) _BUILD_EXTRA_METADATA = { "third_party/address_sorting:address_sorting": { "language": "c", "build": "all", "_RENAME": "address_sorting", }, "@com_google_protobuf//upb:upb": { "language": "c", "build": "all", "_RENAME": "upb", }, "@com_google_protobuf//upb/collections:collections": { "language": "c", "build": "all", "_RENAME": "upb_collections_lib", }, "@com_google_protobuf//upb/json:json": { "language": "c", "build": "all", "_RENAME": "upb_json_lib", }, "@com_google_protobuf//upb/text:text": { "language": "c", "build": "all", "_RENAME": "upb_textformat_lib", }, "@utf8_range//:utf8_range": { "language": "c", "build": "all", # rename to utf8_range_lib is necessary for now to avoid clash with utf8_range target in protobuf's cmake "_RENAME": "utf8_range_lib", }, "@com_googlesource_code_re2//:re2": { "language": "c", "build": "all", "_RENAME": "re2", }, "@com_google_googletest//:gtest": { "language": "c", "build": "private", "_RENAME": "gtest", }, "@zlib//:zlib": { "language": "c", "zlib": True, "build": "private", "defaults": "zlib", "_RENAME": "z", }, "gpr": { "language": "c", "build": "all", }, "grpc": { "language": "c", "build": "all", "baselib": True, "generate_plugin_registry": True, }, "grpc++": { "language": "c++", "build": "all", "baselib": True, }, "grpc++_alts": {"language": "c++", "build": "all", "baselib": True}, "grpc++_error_details": {"language": "c++", "build": "all"}, "grpc++_reflection": {"language": "c++", "build": "all"}, "grpc_authorization_provider": {"language": "c++", "build": "all"}, "grpc++_unsecure": { "language": "c++", "build": "all", "baselib": True, }, "grpc_unsecure": { "language": "c", "build": "all", "baselib": True, "generate_plugin_registry": True, }, "grpcpp_channelz": {"language": "c++", "build": "all"}, "grpc++_test": { "language": "c++", "build": "private", }, "src/compiler:grpc_plugin_support": { "language": "c++", "build": "protoc", "_RENAME": "grpc_plugin_support", }, "src/compiler:grpc_cpp_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_cpp_plugin", }, "src/compiler:grpc_csharp_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_csharp_plugin", }, "src/compiler:grpc_node_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_node_plugin", }, "src/compiler:grpc_objective_c_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_objective_c_plugin", }, "src/compiler:grpc_php_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_php_plugin", }, "src/compiler:grpc_python_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_python_plugin", }, "src/compiler:grpc_ruby_plugin": { "language": "c++", "build": "protoc", "_TYPE": "target", "_RENAME": "grpc_ruby_plugin", }, # TODO(jtattermusch): consider adding grpc++_core_stats # test support libraries "test/core/util:grpc_test_util": { "language": "c", "build": "private", "_RENAME": "grpc_test_util", }, "test/core/util:grpc_test_util_unsecure": { "language": "c", "build": "private", "_RENAME": "grpc_test_util_unsecure", }, # TODO(jtattermusch): consider adding grpc++_test_util_unsecure - it doesn't seem to be used by bazel build (don't forget to set secure: False) "test/cpp/util:test_config": { "language": "c++", "build": "private", "_RENAME": "grpc++_test_config", }, "test/cpp/util:test_util": { "language": "c++", "build": "private", "_RENAME": "grpc++_test_util", }, # benchmark support libraries "test/cpp/microbenchmarks:helpers": { "language": "c++", "build": "test", "defaults": "benchmark", "_RENAME": "benchmark_helpers", }, "test/cpp/interop:interop_client": { "language": "c++", "build": "test", "run": False, "_TYPE": "target", "_RENAME": "interop_client", }, "test/cpp/interop:interop_server": { "language": "c++", "build": "test", "run": False, "_TYPE": "target", "_RENAME": "interop_server", }, # TODO(stanleycheung): re-enable this after cmake support for otel is added # "test/cpp/interop:xds_interop_client": { # "language": "c++", # "build": "test", # "run": False, # "_TYPE": "target", # "_RENAME": "xds_interop_client", # }, # "test/cpp/interop:xds_interop_server": { # "language": "c++", # "build": "test", # "run": False, # "_TYPE": "target", # "_RENAME": "xds_interop_server", # }, "test/cpp/interop:http2_client": { "language": "c++", "build": "test", "run": False, "_TYPE": "target", "_RENAME": "http2_client", }, "test/cpp/qps:qps_json_driver": { "language": "c++", "build": "test", "run": False, "_TYPE": "target", "_RENAME": "qps_json_driver", }, "test/cpp/qps:qps_worker": { "language": "c++", "build": "test", "run": False, "_TYPE": "target", "_RENAME": "qps_worker", }, "test/cpp/util:grpc_cli": { "language": "c++", "build": "test", "run": False, "_TYPE": "target", "_RENAME": "grpc_cli", }, # TODO(jtattermusch): create_jwt and verify_jwt breaks distribtests because it depends on grpc_test_utils and thus requires tests to be built # For now it's ok to disable them as these binaries aren't very useful anyway. # 'test/core/security:create_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_create_jwt' }, # 'test/core/security:verify_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_verify_jwt' }, # TODO(jtattermusch): add remaining tools such as grpc_print_google_default_creds_token (they are not used by bazel build) # TODO(jtattermusch): these fuzzers had no build.yaml equivalent # test/core/compression:message_compress_fuzzer # test/core/compression:message_decompress_fuzzer # test/core/compression:stream_compression_fuzzer # test/core/compression:stream_decompression_fuzzer # test/core/slice:b64_decode_fuzzer # test/core/slice:b64_encode_fuzzer } # We need a complete picture of all the targets and dependencies we're interested in # so we run multiple bazel queries and merge the results. _BAZEL_DEPS_QUERIES = [ 'deps("//test/...")', 'deps("//:all")', 'deps("//src/compiler/...")', # allow resolving bind() workspace rules to the actual targets they point to 'kind(bind, "//external:*")', # The ^ is needed to differentiate proto_library from go_proto_library 'deps(kind("^proto_library", @envoy_api//envoy/...))', # Make sure we have source info for all the targets that _expand_upb_proto_library_rules artificially adds # as upb_c_proto_library dependencies. 'deps("//external:upb_generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me")', ] # Step 1: run a bunch of "bazel query --output xml" queries to collect # the raw build metadata from the bazel build. # At the end of this step we will have a dictionary of bazel rules # that are interesting to us (libraries, binaries, etc.) along # with their most important metadata (sources, headers, dependencies) # # Example of a single bazel rule after being populated: # '//:grpc' : { 'class': 'cc_library', # 'hdrs': ['//:include/grpc/byte_buffer.h', ... ], # 'srcs': ['//:src/core/lib/surface/init.cc', ... ], # 'deps': ['//:grpc_common', ...], # ... } bazel_rules = {} for query in _BAZEL_DEPS_QUERIES: bazel_rules.update( _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query)) ) # Step 1.5: The sources for UPB protos are pre-generated, so we want # to expand the UPB proto library bazel rules into the generated # .upb.h and .upb.c files. _expand_upb_proto_library_rules(bazel_rules) # Step 1.6: Add explicit protobuf dependency to grpc_proto_library rules _patch_grpc_proto_library_rules(bazel_rules) # Step 1.7: Make sure upb descriptor.proto library uses the pre-generated sources. _patch_descriptor_upb_proto_library(bazel_rules) # Step 2: Extract the known bazel cc_test tests. While most tests # will be buildable with other build systems just fine, some of these tests # would be too difficult to build and run with other build systems, # so we simply exclude the ones we don't want. # Note that while making tests buildable with other build systems # than just bazel is extra effort, we still need to do that for these # reasons: # - If our cmake build doesn't have any tests at all, it's hard to make # sure that what it built actually works (we need at least some "smoke tests"). # This is quite important because the build flags between bazel / non-bazel flag might differ # (sometimes it's for interesting reasons that are not easy to overcome) # which makes it even more important to have at least some tests for cmake/make # - Our portability suite actually runs cmake tests and migration of portability # suite fully towards bazel might be intricate (e.g. it's unclear whether it's # possible to get a good enough coverage of different compilers / distros etc. # with bazel) # - some things that are considered "tests" in build.yaml-based builds are actually binaries # we'd want to be able to build anyway (qps_json_worker, interop_client, interop_server, grpc_cli) # so it's unclear how much make/cmake simplification we would gain by removing just some (but not all) test # TODO(jtattermusch): Investigate feasibility of running portability suite with bazel. tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules)) # Step 3: Generate the "extra metadata" for all our build targets. # While the bazel rules give us most of the information we need, # the legacy "build.yaml" format requires some additional fields that # we cannot get just from bazel alone (we call that "extra metadata"). # In this step, we basically analyze the build metadata we have from bazel # and use heuristics to determine (and sometimes guess) the right # extra metadata to use for each target. # # - For some targets (such as the public libraries, helper libraries # and executables) determining the right extra metadata is hard to do # automatically. For these targets, the extra metadata is supplied "manually" # in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match # the semantics of the legacy "build.yaml" as closely as possible. # # - For test binaries, it is possible to generate the "extra metadata" mostly # automatically using a rule-based heuristic approach because most tests # look and behave alike from the build's perspective. # # TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or # the heuristic approach used for tests are ideal and they cannot be made # to cover all possible situations (and are tailored to work with the way # the grpc build currently works), but the idea was to start with something # reasonably simple that matches the "build.yaml"-like semantics as closely # as possible (to avoid changing too many things at once) and gradually get # rid of the legacy "build.yaml"-specific fields one by one. Once that is done, # only very little "extra metadata" would be needed and/or it would be trivial # to generate it automatically. all_extra_metadata = {} all_extra_metadata.update(_BUILD_EXTRA_METADATA) all_extra_metadata.update( _generate_build_extra_metadata_for_tests(tests, bazel_rules) ) # Step 4: Compute the build metadata that will be used in the final build.yaml. # The final build metadata includes transitive dependencies, and sources/headers # expanded without intermediate dependencies. # Example: # '//:grpc' : { ..., # '_TRANSITIVE_DEPS': ['//:gpr_base', ...], # '_COLLAPSED_DEPS': ['gpr', ...], # '_COLLAPSED_SRCS': [...], # '_COLLAPSED_PUBLIC_HEADERS': [...], # '_COLLAPSED_HEADERS': [...] # } _populate_transitive_metadata(bazel_rules, list(all_extra_metadata.keys())) # Step 4a: Update the existing test metadata with the updated build metadata. # Certain build metadata of certain test targets depend on the transitive # metadata that wasn't available earlier. update_test_metadata_with_transitive_metadata(all_extra_metadata, bazel_rules) # Step 5: Generate the final metadata for all the targets. # This is done by combining the bazel build metadata and the "extra metadata" # we obtained in the previous step. # In this step, we also perform some interesting massaging of the target metadata # to end up with a result that is as similar to the legacy build.yaml data # as possible. # - Some targets get renamed (to match the legacy build.yaml target names) # - Some intermediate libraries get elided ("expanded") to better match the set # of targets provided by the legacy build.yaml build # # Originally the target renaming was introduced to address these concerns: # - avoid changing too many things at the same time and avoid people getting # confused by some well know targets suddenly being missing # - Makefile/cmake and also language-specific generators rely on some build # targets being called exactly the way they they are. Some of our testing # scrips also invoke executables (e.g. "qps_json_driver") by their name. # - The autogenerated test name from bazel includes the package path # (e.g. "test_cpp_TEST_NAME"). Without renaming, the target names would # end up pretty ugly (e.g. test_cpp_qps_qps_json_driver). # TODO(jtattermusch): reevaluate the need for target renaming in the future. # # Example of a single generated target: # 'grpc' : { 'language': 'c', # 'public_headers': ['include/grpc/byte_buffer.h', ... ], # 'headers': ['src/core/ext/filters/client_channel/client_channel.h', ... ], # 'src': ['src/core/lib/surface/init.cc', ... ], # 'deps': ['gpr', 'address_sorting', ...], # ... } all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules) # Step 6: convert the dictionary with all the targets to a dict that has # the desired "build.yaml"-like layout. # TODO(jtattermusch): We use the custom "build.yaml"-like layout because # currently all other build systems use that format as their source of truth. # In the future, we can get rid of this custom & legacy format entirely, # but we would need to update the generators for other build systems # at the same time. # # Layout of the result: # { 'libs': { TARGET_DICT_FOR_LIB_XYZ, ... }, # 'targets': { TARGET_DICT_FOR_BIN_XYZ, ... }, # 'tests': { TARGET_DICT_FOR_TEST_XYZ, ...} } build_yaml_like = _convert_to_build_yaml_like(all_targets_dict) # Step 7: generates build metadata for external ProtoBuf libraries. # We only want the ProtoBuf sources from these ProtoBuf dependencies, which may # not be present in our release source tar balls. These rules will be used in CMake # to download these libraries if not existed. Even if the download failed, it # will be a soft error that doesn't block existing target from successfully # built. build_yaml_like[ "external_proto_libraries" ] = _generate_external_proto_libraries() # detect and report some suspicious situations we've seen before _detect_and_print_issues(build_yaml_like) # Step 7: Store the build_autogenerated.yaml in a deterministic (=sorted) # and cleaned-up form. # A basic overview of the resulting "build.yaml"-like format is here: # https://github.com/grpc/grpc/blob/master/templates/README.md # TODO(jtattermusch): The "cleanup" function is taken from the legacy # build system (which used build.yaml) and can be eventually removed. build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string( build_yaml_like ) with open("build_autogenerated.yaml", "w") as file: file.write(build_yaml_string)