From ed770ba4dd50c419148a0fca2b43937a7447e1f9 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 26 Jan 2024 11:54:49 -0800 Subject: [PATCH] Fix cache file length (#4176) - Ensure total file length stays under 96 - Hash the path only if it's too long - Proceed normally (with a warning) if the cache can't be read Fixes #4172 --- CHANGES.md | 3 +++ src/black/cache.py | 9 ++++++++- src/black/mode.py | 21 +++++++++++++++++---- tests/test_black.py | 25 +++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 9a9be4bbeae..e4240eacfca 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,9 @@ +- Shorten the length of the name of the cache file to fix crashes on file systems that + do not support long paths (#4176) + ### Packaging diff --git a/src/black/cache.py b/src/black/cache.py index cfdbc21e92a..35bddb573d2 100644 --- a/src/black/cache.py +++ b/src/black/cache.py @@ -13,6 +13,7 @@ from _black_version import version as __version__ from black.mode import Mode +from black.output import err if sys.version_info >= (3, 11): from typing import Self @@ -64,7 +65,13 @@ def read(cls, mode: Mode) -> Self: resolve the issue. """ cache_file = get_cache_file(mode) - if not cache_file.exists(): + try: + exists = cache_file.exists() + except OSError as e: + # Likely file too long; see #4172 and #4174 + err(f"Unable to read cache file {cache_file} due to {e}") + return cls(mode, cache_file) + if not exists: return cls(mode, cache_file) with cache_file.open("rb") as fobj: diff --git a/src/black/mode.py b/src/black/mode.py index 68919fb4901..128d2b9f108 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -192,6 +192,9 @@ class Deprecated(UserWarning): """Visible deprecation warning.""" +_MAX_CACHE_KEY_PART_LENGTH: Final = 32 + + @dataclass class Mode: target_versions: Set[TargetVersion] = field(default_factory=set) @@ -228,6 +231,19 @@ def get_cache_key(self) -> str: ) else: version_str = "-" + if len(version_str) > _MAX_CACHE_KEY_PART_LENGTH: + version_str = sha256(version_str.encode()).hexdigest()[ + :_MAX_CACHE_KEY_PART_LENGTH + ] + features_and_magics = ( + ",".join(sorted(f.name for f in self.enabled_features)) + + "@" + + ",".join(sorted(self.python_cell_magics)) + ) + if len(features_and_magics) > _MAX_CACHE_KEY_PART_LENGTH: + features_and_magics = sha256(features_and_magics.encode()).hexdigest()[ + :_MAX_CACHE_KEY_PART_LENGTH + ] parts = [ version_str, str(self.line_length), @@ -236,10 +252,7 @@ def get_cache_key(self) -> str: str(int(self.is_ipynb)), str(int(self.skip_source_first_line)), str(int(self.magic_trailing_comma)), - sha256( - (",".join(sorted(f.name for f in self.enabled_features))).encode() - ).hexdigest(), str(int(self.preview)), - sha256((",".join(sorted(self.python_cell_magics))).encode()).hexdigest(), + features_and_magics, ] return ".".join(parts) diff --git a/tests/test_black.py b/tests/test_black.py index 6dbe25a90b6..123ea0bb88a 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -44,6 +44,7 @@ from black import re_compile_maybe_verbose as compile_pattern from black.cache import FileData, get_cache_dir, get_cache_file from black.debug import DebugVisitor +from black.mode import Mode, Preview from black.output import color_diff, diff from black.report import Report @@ -2065,6 +2066,30 @@ def test_get_cache_dir( monkeypatch.setenv("BLACK_CACHE_DIR", str(workspace2)) assert get_cache_dir().parent == workspace2 + def test_cache_file_length(self) -> None: + cases = [ + DEFAULT_MODE, + # all of the target versions + Mode(target_versions=set(TargetVersion)), + # all of the features + Mode(enabled_features=set(Preview)), + # all of the magics + Mode(python_cell_magics={f"magic{i}" for i in range(500)}), + # all of the things + Mode( + target_versions=set(TargetVersion), + enabled_features=set(Preview), + python_cell_magics={f"magic{i}" for i in range(500)}, + ), + ] + for case in cases: + cache_file = get_cache_file(case) + # Some common file systems enforce a maximum path length + # of 143 (issue #4174). We can't do anything if the directory + # path is too long, but ensure the name of the cache file itself + # doesn't get too crazy. + assert len(cache_file.name) <= 96 + def test_cache_broken_file(self) -> None: mode = DEFAULT_MODE with cache_dir() as workspace: