From 032ab3606d7bbb4b40da7871f6193be0dee126a8 Mon Sep 17 00:00:00 2001 From: Marcel Hollerbach Date: Tue, 12 May 2020 10:26:21 +0200 Subject: [PATCH] CompilerArgs: refactor __iadd__ the previous optimizations from 4524088d386d2e2315d8fef6ffedc11d8e9a394a were not relaly good, and not really scaleable, since only the lookup was improved. However, the really heavy calls to remove have not been improved. With this commit we are refactoring CompilerArgs into a data structure which does not use remove at all. This works that we are building a pre and post list, which gets flushed into __container at some point. However, we build pre and post by deduplicating forward. Later on, when we are flushing pre and post into __container, we are deduplicating backwards the list, so we are not changing behaviour here. This overall cuts off 10s of the efl configuration time. Further more this improves configure times on arm devices a lot more, since remove does seem to be a lot slower there. In general this results in the fact that __iadd__ is not within the top 5 of costly functions in generate_single_complie. --- mesonbuild/compilers/compilers.py | 82 ++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 07b07d963..6b6df940a 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -14,8 +14,10 @@ import contextlib, os.path, re, tempfile import collections.abc +from collections import deque import itertools import typing as T +from functools import lru_cache from ..linkers import ( GnuLikeDynamicLinkerMixin, LinkerEnvVarsMixin, SolarisDynamicLinker, @@ -465,6 +467,47 @@ class CompilerArgs(collections.abc.MutableSequence): iterable: T.Optional[T.Iterable[str]] = None): self.compiler = compiler self.__container = list(iterable) if iterable is not None else [] # type: T.List[str] + self.pre = deque() + self.post = deque() + + # Flush the saved pre and post list into the __container list + # + # This correctly deduplicates the entries after _can_dedup definition + # Note: This function is designed to work without delete operations, as deletions are worsening the performance a lot. + def flush_pre_post(self): + pre_flush = deque() + pre_flush_set = set() + post_flush = deque() + post_flush_set = set() + + #The two lists are here walked from the front to the back, in order to not need removals for deduplication + for a in reversed(self.pre): + dedup = self._can_dedup(a) + if a not in pre_flush_set: + pre_flush.appendleft(a) + if dedup == 2: + pre_flush_set.add(a) + for a in reversed(self.post): + dedup = self._can_dedup(a) + if a not in post_flush_set: + post_flush.appendleft(a) + if dedup == 2: + post_flush_set.add(a) + + #pre and post will overwrite every element that is in the container + #only copy over args that are in __container but not in the post flush or pre flush set + + for a in self.__container: + if a not in post_flush_set and a not in pre_flush_set: + pre_flush.append(a) + + self.__container = list(pre_flush) + list(post_flush) + self.pre.clear() + self.post.clear() + + def __iter__(self): + self.flush_pre_post() + return iter(self.__container); @T.overload # noqa: F811 def __getitem__(self, index: int) -> str: # noqa: F811 @@ -475,6 +518,7 @@ class CompilerArgs(collections.abc.MutableSequence): pass def __getitem__(self, index): # noqa: F811 + self.flush_pre_post() return self.__container[index] @T.overload # noqa: F811 @@ -486,21 +530,26 @@ class CompilerArgs(collections.abc.MutableSequence): pass def __setitem__(self, index, value) -> None: # noqa: F811 + self.flush_pre_post() self.__container[index] = value def __delitem__(self, index: T.Union[int, slice]) -> None: + self.flush_pre_post() del self.__container[index] def __len__(self) -> int: - return len(self.__container) + return len(self.__container) + len(self.pre) + len(self.post) def insert(self, index: int, value: str) -> None: + self.flush_pre_post() self.__container.insert(index, value) def copy(self) -> 'CompilerArgs': + self.flush_pre_post() return CompilerArgs(self.compiler, self.__container.copy()) @classmethod + @lru_cache(maxsize=None) def _can_dedup(cls, arg): ''' Returns whether the argument can be safely de-duped. This is dependent @@ -555,6 +604,7 @@ class CompilerArgs(collections.abc.MutableSequence): return 0 @classmethod + @lru_cache(maxsize=None) def _should_prepend(cls, arg): if arg.startswith(cls.prepend_prefixes): return True @@ -568,6 +618,7 @@ class CompilerArgs(collections.abc.MutableSequence): # between static libraries, and for recursively searching for symbols # needed by static libraries that are provided by object files or # shared libraries. + self.flush_pre_post() if copy: new = self.copy() else: @@ -627,6 +678,7 @@ class CompilerArgs(collections.abc.MutableSequence): for absolute paths to libraries, etc, which can always be de-duped safely. ''' + self.flush_pre_post() if os.path.isabs(arg): self.append(arg) else: @@ -638,6 +690,7 @@ class CompilerArgs(collections.abc.MutableSequence): reordering or de-dup except for absolute paths where the order of include search directories is not relevant ''' + self.flush_pre_post() for elem in iterable: self.append_direct(elem) @@ -653,6 +706,7 @@ class CompilerArgs(collections.abc.MutableSequence): self.extend_direct(lflags) def __add__(self, args: T.Iterable[str]) -> 'CompilerArgs': + self.flush_pre_post() new = self.copy() new += args return new @@ -662,8 +716,7 @@ class CompilerArgs(collections.abc.MutableSequence): Add two CompilerArgs while taking into account overriding of arguments and while preserving the order of arguments as much as possible ''' - pre = [] # type: T.List[str] - post = [] # type: T.List[str] + tmp_pre = deque() if not isinstance(args, collections.abc.Iterable): raise TypeError('can only concatenate Iterable[str] (not "{}") to CompilerArgs'.format(args)) for arg in args: @@ -673,32 +726,24 @@ class CompilerArgs(collections.abc.MutableSequence): dedup = self._can_dedup(arg) if dedup == 1: # Argument already exists and adding a new instance is useless - if arg in self or arg in pre or arg in post: + if arg in self.__container or arg in self.pre or arg in self.post: continue - if dedup == 2: - # Remove all previous occurrences of the arg and add it anew - if arg in self: - self.remove(arg) - if arg in pre: - pre.remove(arg) - if arg in post: - post.remove(arg) if self._should_prepend(arg): - pre.append(arg) + tmp_pre.appendleft(arg) else: - post.append(arg) - # Insert at the beginning - self[:0] = pre - # Append to the end - self.__container += post + self.post.append(arg) + self.pre.extendleft(tmp_pre) + #pre and post is going to be merged later before a iter call return self def __radd__(self, args: T.Iterable[str]): + self.flush_pre_post() new = CompilerArgs(self.compiler, args) new += self return new def __eq__(self, other: T.Any) -> T.Union[bool, type(NotImplemented)]: + self.flush_pre_post() # Only allow equality checks against other CompilerArgs and lists instances if isinstance(other, CompilerArgs): return self.compiler == other.compiler and self.__container == other.__container @@ -713,6 +758,7 @@ class CompilerArgs(collections.abc.MutableSequence): self.__iadd__(args) def __repr__(self) -> str: + self.flush_pre_post() return 'CompilerArgs({!r}, {!r})'.format(self.compiler, self.__container) class Compiler: