From 5f14d48038d81910288c1d2b01fac7f61497e558 Mon Sep 17 00:00:00 2001 From: Thad House Date: Wed, 10 Jan 2024 21:44:35 -0800 Subject: [PATCH] Add option to limit the number of workers https://github.com/python/cpython/issues/89240 Any use of mp.Pool with a number of workers greater than 60 fails on Windows machines. Twofold solution is introduced: - add option for users to explicitly change number of workers through -w/--workers, this is similar to solution used in black - add limitation in the code for Windows machines Co-Authored-By: Blank Spruce <32396809+BlankSpruce@users.noreply.github.com> --- .gersemirc.example | 1 + CHANGELOG.md | 4 ++++ README.md | 5 ++++- gersemi/__main__.py | 13 ++++++++++- gersemi/__version__.py | 2 +- gersemi/configuration.py | 36 ++++++++++++++++++++++++++++--- gersemi/configuration.schema.json | 6 ++++++ gersemi/runner.py | 10 +++++---- tests/test_configuration.py | 10 +++++++-- 9 files changed, 75 insertions(+), 12 deletions(-) diff --git a/.gersemirc.example b/.gersemirc.example index cecb8d4..435daee 100644 --- a/.gersemirc.example +++ b/.gersemirc.example @@ -6,3 +6,4 @@ line_length: 80 list_expansion: favour-inlining quiet: false unsafe: false +workers: 8 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f77f98..90d8c37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [0.11.0] 2024-10-11 +### Added +- Number of workers spawned for formatting multiple files can be changed with `-w/--workers`. By default it will be number of CPUs available in the system but limited to 60 for Windows machines due to [this](https://github.com/python/cpython/issues/89240). + ## [0.10.0] 2023-12-22 ### Added - configuration schema that can be used with yaml LSP server, see: [JSON Schema](https://json-schema.org/) and #12 diff --git a/README.md b/README.md index f895e3f..b0fcd28 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,9 @@ configuration: entities will be formatted in such way that sublists will be completely expanded once expansion becomes necessary at all. [default: favour-inlining] + -w INTEGER, --workers INTEGER + Number of workers used to format multiple files in parallel. + [default: number of CPUs on this system] ``` ### [pre-commit](https://pre-commit.com/) hook @@ -71,7 +74,7 @@ You can use gersemi with a pre-commit hook by adding the following to `.pre-comm ```yaml repos: - repo: https://github.com/BlankSpruce/gersemi - rev: 0.10.0 + rev: 0.11.0 hooks: - id: gersemi ``` diff --git a/gersemi/__main__.py b/gersemi/__main__.py index 2739957..ae21a7e 100644 --- a/gersemi/__main__.py +++ b/gersemi/__main__.py @@ -132,6 +132,17 @@ def create_argparser(): [default: {Configuration.list_expansion.value}] """, ) + configuration_group.add_argument( + "-w", + "--workers", + metavar="INTEGER", + dest="workers", + type=int, + help=f""" + {conf_doc['workers']} + [default: number of CPUs on this system] + """, + ) parser.add_argument( dest="sources", @@ -171,7 +182,7 @@ def main(): configuration = make_configuration(args) mode = get_mode(args) - sys.exit(run(mode, configuration, args.sources)) + sys.exit(run(mode, configuration, args.workers, args.sources)) if __name__ == "__main__": diff --git a/gersemi/__version__.py b/gersemi/__version__.py index 888513b..8894bdf 100644 --- a/gersemi/__version__.py +++ b/gersemi/__version__.py @@ -4,4 +4,4 @@ __license__ = "MPL 2.0" __title__ = "gersemi" __url__ = "https://github.com/BlankSpruce/gersemi" -__version__ = "0.10.0" +__version__ = "0.11.0" diff --git a/gersemi/configuration.py b/gersemi/configuration.py index 2d39981..e64dd41 100644 --- a/gersemi/configuration.py +++ b/gersemi/configuration.py @@ -3,13 +3,24 @@ from enum import Enum from hashlib import sha1 from itertools import chain +import multiprocessing import os from pathlib import Path +import sys import textwrap from typing import Iterable, Optional import yaml +def number_of_workers(): + result = multiprocessing.cpu_count() + if sys.platform == "win32": + # https://bugs.python.org/issue26903 + # https://github.com/python/cpython/issues/89240 + return min(result, 60) + return result + + def doc(text: str) -> str: return " ".join(textwrap.dedent(text).splitlines()).strip() @@ -111,15 +122,22 @@ class Configuration: ), ) + workers: int = field( + default=number_of_workers(), + metadata=dict( + title="Workers", + description="Number of workers used to format multiple files in parallel.", + ), + ) + def summary(self): hasher = sha1() hasher.update(repr(self).encode("utf-8")) return hasher.hexdigest() -def make_default_configuration_file(): - default_configuration = Configuration() - d = asdict(default_configuration) +def make_configuration_file(configuration): + d = asdict(configuration) d["list_expansion"] = d["list_expansion"].value result = f""" # yaml-language-server: $schema=https://raw.githubusercontent.com/BlankSpruce/gersemi/master/gersemi/configuration.schema.json @@ -129,6 +147,10 @@ def make_default_configuration_file(): return result.strip() +def make_default_configuration_file(): + return make_configuration_file(Configuration()) + + def find_common_parent_path(paths: Iterable[Path]) -> Path: return Path(os.path.commonpath([path.absolute() for path in paths])) @@ -168,6 +190,10 @@ def sanitize_list_expansion(list_expansion): ) +def sanitize_workers(workers): + return max(1, workers) + + def load_configuration_from_file(configuration_file_path: Path) -> Configuration: with enter_directory(configuration_file_path.parent): with open(configuration_file_path, "r", encoding="utf-8") as f: @@ -178,6 +204,8 @@ def load_configuration_from_file(configuration_file_path: Path) -> Configuration config["list_expansion"] = sanitize_list_expansion( config["list_expansion"] ) + if "workers" in config: + config["workers"] = sanitize_workers(config["workers"]) return Configuration(**config) @@ -193,6 +221,8 @@ def override_configuration_with_args( value = normalize_definitions(value) if param == "list_expansion": value = sanitize_list_expansion(value) + if param == "workers": + value = sanitize_workers(value) setattr(configuration, param, value) return configuration diff --git a/gersemi/configuration.schema.json b/gersemi/configuration.schema.json index 4335ff3..046beae 100644 --- a/gersemi/configuration.schema.json +++ b/gersemi/configuration.schema.json @@ -54,6 +54,12 @@ "description": "Skip default sanity checks.", "title": "Unsafe", "type": "boolean" + }, + "workers": { + "minimum": 1, + "description": "Number of workers used to format multiple files in parallel.", + "title": "Workers", + "type": "integer" } }, "title": "Configuration", diff --git a/gersemi/runner.py b/gersemi/runner.py index 919df25..555ee08 100644 --- a/gersemi/runner.py +++ b/gersemi/runner.py @@ -137,10 +137,10 @@ def consume_task_result(task_result: TaskResult) -> Tuple[Path, int]: return path, return_code -def create_pool(is_stdin_in_sources): +def create_pool(is_stdin_in_sources, num_workers): if is_stdin_in_sources: return mp_dummy.Pool - return partial(mp.Pool, processes=mp.cpu_count()) + return partial(mp.Pool, processes=num_workers) def filter_already_formatted_files( @@ -163,12 +163,14 @@ def store_files_in_cache( cache.store_files(configuration_summary, files) -def run(mode: Mode, configuration: Configuration, sources: Iterable[Path]): +def run( + mode: Mode, configuration: Configuration, num_workers: int, sources: Iterable[Path] +): configuration_summary = configuration.summary() requested_files = get_files(sources) task = select_task(mode, configuration) - pool_cm = create_pool(Path("-") in requested_files) + pool_cm = create_pool(Path("-") in requested_files, num_workers) with create_cache() as cache, pool_cm() as pool: files_to_format = list( filter_already_formatted_files( diff --git a/tests/test_configuration.py b/tests/test_configuration.py index b446fa5..9d39a3c 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -6,7 +6,7 @@ from gersemi.configuration import ( Configuration, ListExpansion, - make_default_configuration_file, + make_configuration_file, ) try: @@ -44,6 +44,10 @@ def generate(self, schema, mode=None): } result["properties"]["list_expansion"]["$ref"] = "#/$defs/ListExpansion" del result["properties"]["list_expansion"]["allOf"] + + del result["properties"]["workers"]["default"] + result["properties"]["workers"]["minimum"] = 1 + return result @@ -65,4 +69,6 @@ def test_example_file_in_repository_is_consistent_with_configuration_definition( with open(example_path, "r", encoding="utf-8") as f: example = f.read().strip() - assert example == make_default_configuration_file() + configuration = Configuration() + configuration.workers = 8 + assert example == make_configuration_file(configuration)