Skip to content

Commit

Permalink
Add option to limit the number of workers
Browse files Browse the repository at this point in the history
python/cpython#89240
Any use of mp.Pool with a number of workers greater than 60 fails
on Windows machines.

Twofold solution is introduced:
- add option for users to explicitly change number of workers
through -w/--workers, this is similar to solution used in black
- add limitation in the code for Windows machines

Co-Authored-By: Blank Spruce <32396809+BlankSpruce@users.noreply.github.com>
  • Loading branch information
ThadHouse and BlankSpruce committed Jan 11, 2024
1 parent 96e25a1 commit 5f14d48
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 12 deletions.
1 change: 1 addition & 0 deletions .gersemirc.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ line_length: 80
list_expansion: favour-inlining
quiet: false
unsafe: false
workers: 8
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [0.11.0] 2024-10-11
### Added
- Number of workers spawned for formatting multiple files can be changed with `-w/--workers`. By default it will be number of CPUs available in the system but limited to 60 for Windows machines due to [this](https://github.com/python/cpython/issues/89240).

## [0.10.0] 2023-12-22
### Added
- configuration schema that can be used with yaml LSP server, see: [JSON Schema](https://json-schema.org/) and #12
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ configuration:
entities will be formatted in such way that sublists will be
completely expanded once expansion becomes necessary at all.
[default: favour-inlining]
-w INTEGER, --workers INTEGER
Number of workers used to format multiple files in parallel.
[default: number of CPUs on this system]
```

### [pre-commit](https://pre-commit.com/) hook
Expand All @@ -71,7 +74,7 @@ You can use gersemi with a pre-commit hook by adding the following to `.pre-comm
```yaml
repos:
- repo: https://github.com/BlankSpruce/gersemi
rev: 0.10.0
rev: 0.11.0
hooks:
- id: gersemi
```
Expand Down
13 changes: 12 additions & 1 deletion gersemi/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,17 @@ def create_argparser():
[default: {Configuration.list_expansion.value}]
""",
)
configuration_group.add_argument(
"-w",
"--workers",
metavar="INTEGER",
dest="workers",
type=int,
help=f"""
{conf_doc['workers']}
[default: number of CPUs on this system]
""",
)

parser.add_argument(
dest="sources",
Expand Down Expand Up @@ -171,7 +182,7 @@ def main():
configuration = make_configuration(args)
mode = get_mode(args)

sys.exit(run(mode, configuration, args.sources))
sys.exit(run(mode, configuration, args.workers, args.sources))


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion gersemi/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
__license__ = "MPL 2.0"
__title__ = "gersemi"
__url__ = "https://github.com/BlankSpruce/gersemi"
__version__ = "0.10.0"
__version__ = "0.11.0"
36 changes: 33 additions & 3 deletions gersemi/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,24 @@
from enum import Enum
from hashlib import sha1
from itertools import chain
import multiprocessing
import os
from pathlib import Path
import sys
import textwrap
from typing import Iterable, Optional
import yaml


def number_of_workers():
result = multiprocessing.cpu_count()
if sys.platform == "win32":
# https://bugs.python.org/issue26903
# https://github.com/python/cpython/issues/89240
return min(result, 60)
return result


def doc(text: str) -> str:
return " ".join(textwrap.dedent(text).splitlines()).strip()

Expand Down Expand Up @@ -111,15 +122,22 @@ class Configuration:
),
)

workers: int = field(
default=number_of_workers(),
metadata=dict(
title="Workers",
description="Number of workers used to format multiple files in parallel.",
),
)

def summary(self):
hasher = sha1()
hasher.update(repr(self).encode("utf-8"))
return hasher.hexdigest()


def make_default_configuration_file():
default_configuration = Configuration()
d = asdict(default_configuration)
def make_configuration_file(configuration):
d = asdict(configuration)
d["list_expansion"] = d["list_expansion"].value
result = f"""
# yaml-language-server: $schema=https://raw.githubusercontent.com/BlankSpruce/gersemi/master/gersemi/configuration.schema.json
Expand All @@ -129,6 +147,10 @@ def make_default_configuration_file():
return result.strip()


def make_default_configuration_file():
return make_configuration_file(Configuration())


def find_common_parent_path(paths: Iterable[Path]) -> Path:
return Path(os.path.commonpath([path.absolute() for path in paths]))

Expand Down Expand Up @@ -168,6 +190,10 @@ def sanitize_list_expansion(list_expansion):
)


def sanitize_workers(workers):
return max(1, workers)


def load_configuration_from_file(configuration_file_path: Path) -> Configuration:
with enter_directory(configuration_file_path.parent):
with open(configuration_file_path, "r", encoding="utf-8") as f:
Expand All @@ -178,6 +204,8 @@ def load_configuration_from_file(configuration_file_path: Path) -> Configuration
config["list_expansion"] = sanitize_list_expansion(
config["list_expansion"]
)
if "workers" in config:
config["workers"] = sanitize_workers(config["workers"])
return Configuration(**config)


Expand All @@ -193,6 +221,8 @@ def override_configuration_with_args(
value = normalize_definitions(value)
if param == "list_expansion":
value = sanitize_list_expansion(value)
if param == "workers":
value = sanitize_workers(value)
setattr(configuration, param, value)
return configuration

Expand Down
6 changes: 6 additions & 0 deletions gersemi/configuration.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
"description": "Skip default sanity checks.",
"title": "Unsafe",
"type": "boolean"
},
"workers": {
"minimum": 1,
"description": "Number of workers used to format multiple files in parallel.",
"title": "Workers",
"type": "integer"
}
},
"title": "Configuration",
Expand Down
10 changes: 6 additions & 4 deletions gersemi/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ def consume_task_result(task_result: TaskResult) -> Tuple[Path, int]:
return path, return_code


def create_pool(is_stdin_in_sources):
def create_pool(is_stdin_in_sources, num_workers):
if is_stdin_in_sources:
return mp_dummy.Pool
return partial(mp.Pool, processes=mp.cpu_count())
return partial(mp.Pool, processes=num_workers)


def filter_already_formatted_files(
Expand All @@ -163,12 +163,14 @@ def store_files_in_cache(
cache.store_files(configuration_summary, files)


def run(mode: Mode, configuration: Configuration, sources: Iterable[Path]):
def run(
mode: Mode, configuration: Configuration, num_workers: int, sources: Iterable[Path]
):
configuration_summary = configuration.summary()
requested_files = get_files(sources)
task = select_task(mode, configuration)

pool_cm = create_pool(Path("-") in requested_files)
pool_cm = create_pool(Path("-") in requested_files, num_workers)
with create_cache() as cache, pool_cm() as pool:
files_to_format = list(
filter_already_formatted_files(
Expand Down
10 changes: 8 additions & 2 deletions tests/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from gersemi.configuration import (
Configuration,
ListExpansion,
make_default_configuration_file,
make_configuration_file,
)

try:
Expand Down Expand Up @@ -44,6 +44,10 @@ def generate(self, schema, mode=None):
}
result["properties"]["list_expansion"]["$ref"] = "#/$defs/ListExpansion"
del result["properties"]["list_expansion"]["allOf"]

del result["properties"]["workers"]["default"]
result["properties"]["workers"]["minimum"] = 1

return result


Expand All @@ -65,4 +69,6 @@ def test_example_file_in_repository_is_consistent_with_configuration_definition(
with open(example_path, "r", encoding="utf-8") as f:
example = f.read().strip()

assert example == make_default_configuration_file()
configuration = Configuration()
configuration.workers = 8
assert example == make_configuration_file(configuration)

0 comments on commit 5f14d48

Please sign in to comment.