Skip to content

Commit

Permalink
Restore dict based typesafe caching (#4898)
Browse files Browse the repository at this point in the history
This also restores `isfile_case` caching.
  • Loading branch information
ethanhs authored and JukkaL committed Apr 13, 2018
1 parent e95f7b5 commit 2db1dd0
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 23 deletions.
33 changes: 23 additions & 10 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,18 +825,26 @@ class FindModuleCache:

def __init__(self, fscache: Optional[FileSystemMetaCache] = None) -> None:
self.fscache = fscache or FileSystemMetaCache()
self.find_lib_path_dirs = functools.lru_cache(maxsize=None)(self._find_lib_path_dirs)
self.find_module = functools.lru_cache(maxsize=None)(self._find_module)
# Cache find_lib_path_dirs: (dir_chain, lib_path)
self.dirs = {} # type: Dict[Tuple[str, Tuple[str, ...]], List[str]]
# Cache find_module: (id, lib_path, python_version) -> result.
self.results = {} # type: Dict[Tuple[str, Tuple[str, ...], Optional[str]], Optional[str]]

def clear(self) -> None:
self.find_module.cache_clear()
self.find_lib_path_dirs.cache_clear()
self.results.clear()
self.dirs.clear()

def _find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> List[str]:
def find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> List[str]:
# Cache some repeated work within distinct find_module calls: finding which
# elements of lib_path have even the subdirectory they'd need for the module
# to exist. This is shared among different module ids when they differ only
# in the last component.
key = (dir_chain, lib_path)
if key not in self.dirs:
self.dirs[key] = self._find_lib_path_dirs(dir_chain, lib_path)
return self.dirs[key]

def _find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> List[str]:
dirs = []
for pathitem in lib_path:
# e.g., '/usr/lib/python3.4/foo/bar'
Expand All @@ -845,9 +853,16 @@ def _find_lib_path_dirs(self, dir_chain: str, lib_path: Tuple[str, ...]) -> List
dirs.append(dir)
return dirs

def find_module(self, id: str, lib_path: Tuple[str, ...],
python_executable: Optional[str]) -> Optional[str]:
"""Return the path of the module source file, or None if not found."""
key = (id, lib_path, python_executable)
if key not in self.results:
self.results[key] = self._find_module(id, lib_path, python_executable)
return self.results[key]

def _find_module(self, id: str, lib_path: Tuple[str, ...],
python_executable: Optional[str]) -> Optional[str]:
"""Return the path of the module source file, or None if not found."""
fscache = self.fscache

# If we're looking for a module like 'foo.bar.baz', it's likely that most of the
Expand Down Expand Up @@ -2167,14 +2182,12 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
graph = load_graph(sources, manager)

t1 = time.time()
fm_cache_size = manager.find_module_cache.find_module.cache_info().currsize
fm_dir_cache_size = manager.find_module_cache.find_lib_path_dirs.cache_info().currsize
manager.add_stats(graph_size=len(graph),
stubs_found=sum(g.path is not None and g.path.endswith('.pyi')
for g in graph.values()),
graph_load_time=(t1 - t0),
fm_cache_size=fm_cache_size,
fm_dir_cache_size=fm_dir_cache_size,
fm_cache_size=len(manager.find_module_cache.results),
fm_dir_cache_size=len(manager.find_module_cache.dirs),
)
if not graph:
print("Nothing to do?!")
Expand Down
34 changes: 21 additions & 13 deletions mypy/fscache.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,41 @@

class FileSystemMetaCache:
def __init__(self) -> None:
self.stat = functools.lru_cache(maxsize=None)(self._stat)
self.listdir = functools.lru_cache(maxsize=None)(self._listdir)
# lru_cache doesn't handle exceptions, so we need special caches for them.
self.stat_error_cache = {} # type: Dict[str, Exception]
self.listdir_error_cache = {} # type: Dict[str, Exception]
self.flush()

def flush(self) -> None:
"""Start another transaction and empty all caches."""
self.stat.cache_clear()
self.listdir.cache_clear()
self.stat_error_cache.clear()
self.listdir_error_cache.clear()
self.stat_cache = {} # type: Dict[str, os.stat_result]
self.stat_error_cache = {} # type: Dict[str, Exception]
self.listdir_cache = {} # type: Dict[str, List[str]]
self.listdir_error_cache = {} # type: Dict[str, Exception]
self.isfile_case_cache = {} # type: Dict[str, bool]

def _stat(self, path: str) -> os.stat_result:
def stat(self, path: str) -> os.stat_result:
if path in self.stat_cache:
return self.stat_cache[path]
if path in self.stat_error_cache:
raise self.stat_error_cache[path]
try:
return os.stat(path)
st = os.stat(path)
except Exception as err:
self.stat_error_cache[path] = err
raise
self.stat_cache[path] = st
return st

def _listdir(self, path: str) -> List[str]:
def listdir(self, path: str) -> List[str]:
if path in self.listdir_cache:
return self.listdir_cache[path]
if path in self.listdir_error_cache:
raise self.listdir_error_cache[path]
try:
return os.listdir(path)
results = os.listdir(path)
except Exception as err:
self.listdir_error_cache[path] = err
raise err
self.listdir_cache[path] = results
return results

def isfile(self, path: str) -> bool:
try:
Expand All @@ -84,6 +89,8 @@ def isfile_case(self, path: str) -> bool:
TODO: We should maybe check the case for some directory components also,
to avoid permitting wrongly-cased *packages*.
"""
if path in self.isfile_case_cache:
return self.isfile_case_cache[path]
head, tail = os.path.split(path)
if not tail:
res = False
Expand All @@ -93,6 +100,7 @@ def isfile_case(self, path: str) -> bool:
res = tail in names and self.isfile(path)
except OSError:
res = False
self.isfile_case_cache[path] = res
return res

def isdir(self, path: str) -> bool:
Expand Down

0 comments on commit 2db1dd0

Please sign in to comment.