From 34920fcf8eaec8d881aae60c8301f318d415a19c Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 24 May 2022 15:32:08 -0700 Subject: [PATCH] [build] Better dependency fixer (#29776) * [build] Better dependency fixer Separate metrics for dependency selection into three: avoidness (to strongly steer things away based on tags), a selectable metric, and score (as an unbiasing heuristic) Add a way to select targets with the command line to help iterating Three selectable metrics are implemented: 1. edit distance, so that we tend to select things that don't change the BUILD file 2. list size, as a way of minimizing dependency lists 3. best overall heuristic score (1) is useful to avoid changing things that folks have carefully engineered (2, 3) help in optimizing the graph * Automated change: Fix sanity tests * ??? * Update fix_build_deps.py Co-authored-by: ctiller --- tools/distrib/fix_build_deps.py | 136 +++++++++++++++++++++++++------- 1 file changed, 109 insertions(+), 27 deletions(-) diff --git a/tools/distrib/fix_build_deps.py b/tools/distrib/fix_build_deps.py index 8e6c36ec900..7df86858e5f 100755 --- a/tools/distrib/fix_build_deps.py +++ b/tools/distrib/fix_build_deps.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import collections from doctest import SKIP import os @@ -26,12 +27,15 @@ import tempfile ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..')) os.chdir(ROOT) -Vendor = collections.namedtuple('Vendor', ['name', 'score']) vendors = collections.defaultdict(list) +scores = collections.defaultdict(int) +avoidness = collections.defaultdict(int) consumes = {} no_update = set() buildozer_commands = [] needs_codegen_base_src = set() +original_deps = {} +original_external_deps = {} # TODO(ctiller): ideally we wouldn't hardcode a bunch of paths here. # We can likely parse out BUILD files from dependencies to generate this index. @@ -235,20 +239,24 @@ def grpc_cc_library(name, srcs=[], select_deps=None, tags=[], + deps=[], + external_deps=[], **kwargs): if select_deps or 'nofixdeps' in tags or 'grpc-autodeps' not in tags: no_update.add(name) - score = len(public_hdrs + hdrs) + scores[name] = len(public_hdrs + hdrs) # avoid_dep is the internal way of saying prefer something else # we add grpc_avoid_dep to allow internal grpc-only stuff to avoid each # other, whilst not biasing dependent projects if 'avoid_dep' in tags or 'grpc_avoid_dep' in tags: - score += 1000000 + avoidness[name] += 10 if 'nofixdeps' in tags: - score += 1000 + avoidness[name] += 1 for hdr in hdrs + public_hdrs: - vendors[hdr].append(Vendor(name, score)) + vendors[hdr].append(name) inc = set() + original_deps[name] = frozenset(deps) + original_external_deps[name] = frozenset(external_deps) for src in hdrs + public_hdrs + srcs: for line in open(src): m = re.search(r'#include <(.*)>', line) @@ -262,16 +270,6 @@ def grpc_cc_library(name, consumes[name] = list(inc) -def choose_vendor(library, vendors): - best = None - for vendor in vendors: - if vendor.name == library: - return None - if best is None or vendor.score < best.score: - best = vendor - return best.name - - def buildozer(cmd, target): buildozer_commands.append('%s|%s' % (cmd, target)) @@ -288,6 +286,54 @@ def buildozer_set_list(name, values, target, via=""): buildozer('rename %s %s' % (via, name), target) +def score_edit_distance(proposed, existing): + """Score a proposed change primarily by edit distance""" + sum = 0 + for p in proposed: + if p not in existing: + sum += 1 + for e in existing: + if e not in proposed: + sum += 1 + return sum + + +def total_score(proposal): + return sum(scores[dep] for dep in proposal) + + +def total_avoidness(proposal): + return sum(avoidness[dep] for dep in proposal) + + +def score_list_size(proposed, existing): + """Score a proposed change primarily by number of dependencies""" + return len(proposed) + + +def score_best(proposed, existing): + """Score a proposed change primarily by dependency score""" + return 0 + + +SCORERS = { + 'edit_distance': score_edit_distance, + 'list_size': score_list_size, + 'best': score_best, +} + +parser = argparse.ArgumentParser(description='Fix build dependencies') +parser.add_argument('targets', + nargs='*', + default=[], + help='targets to fix (empty => all)') +parser.add_argument('--score', + type=str, + default='edit_distance', + help='scoring function to use: one of ' + + ', '.join(SCORERS.keys())) +args = parser.parse_args() + exec( open('BUILD', 'r').read(), { 'load': lambda filename, *args: None, @@ -305,13 +351,51 @@ exec( 'filegroup': lambda name, **kwargs: None, }, {}) + +# Keeps track of all possible sets of dependencies that could satify the +# problem. (models the list monad in Haskell!) +class Choices: + + def __init__(self): + self.choices = set() + self.choices.add(frozenset()) + + def add_one_of(self, choices): + if not choices: + return + new_choices = set() + for append_choice in choices: + for choice in self.choices: + new_choices.add(choice.union([append_choice])) + self.choices = new_choices + + def add(self, choice): + self.add_one_of([choice]) + + def remove(self, remove): + new_choices = set() + for choice in self.choices: + new_choices.add(choice.difference([remove])) + self.choices = new_choices + + def best(self, scorer): + best = None + final_scorer = lambda x: (total_avoidness(x), scorer(x), total_score(x)) + for choice in self.choices: + if best is None or final_scorer(choice) < final_scorer(best): + best = choice + return best + + error = False for library in sorted(consumes.keys()): if library in no_update: continue + if args.targets and library not in args.targets: + continue hdrs = sorted(consumes[library]) - deps = set() - external_deps = set() + deps = Choices() + external_deps = Choices() for hdr in hdrs: if hdr == 'src/core/lib/profiling/stap_probes.h': continue @@ -321,15 +405,11 @@ for library in sorted(consumes.keys()): continue if hdr in vendors: - vendor = choose_vendor(library, vendors[hdr]) - if vendor: - deps.add(vendor) + deps.add_one_of(vendors[hdr]) continue if 'include/' + hdr in vendors: - vendor = choose_vendor(library, vendors['include/' + hdr]) - if vendor: - deps.add(vendor) + deps.add_one_of(vendors['include/' + hdr]) continue if '.' not in hdr: @@ -389,11 +469,13 @@ for library in sorted(consumes.keys()): if library in needs_codegen_base_src: deps.add('grpc++_codegen_base_src') - if library in deps: - deps.remove(library) + deps.remove(library) - deps = sorted(deps) - external_deps = sorted(external_deps) + deps = sorted( + deps.best(lambda x: SCORERS[args.score](x, original_deps[library]))) + external_deps = sorted( + external_deps.best(lambda x: SCORERS[args.score] + (x, original_external_deps[library]))) target = ':' + library buildozer_set_list('external_deps', external_deps, target, via='deps') buildozer_set_list('deps', deps, target)