Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to limit the number of mp workers #14

Merged
merged 1 commit into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gersemirc.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ line_length: 80
list_expansion: favour-inlining
quiet: false
unsafe: false
workers: 8
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [0.11.0] 2024-10-11
### Added
- Number of workers spawned for formatting multiple files can be changed with `-w/--workers`. By default it will be number of CPUs available in the system but limited to 60 for Windows machines due to [this](https://github.com/python/cpython/issues/89240).

## [0.10.0] 2023-12-22
### Added
- configuration schema that can be used with yaml LSP server, see: [JSON Schema](https://json-schema.org/) and #12
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ configuration:
entities will be formatted in such way that sublists will be
completely expanded once expansion becomes necessary at all.
[default: favour-inlining]
-w INTEGER, --workers INTEGER
Number of workers used to format multiple files in parallel.
[default: number of CPUs on this system]
```

### [pre-commit](https://pre-commit.com/) hook
Expand All @@ -71,7 +74,7 @@ You can use gersemi with a pre-commit hook by adding the following to `.pre-comm
```yaml
repos:
- repo: https://github.com/BlankSpruce/gersemi
rev: 0.10.0
rev: 0.11.0
hooks:
- id: gersemi
```
Expand Down
13 changes: 12 additions & 1 deletion gersemi/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,17 @@ def create_argparser():
[default: {Configuration.list_expansion.value}]
""",
)
configuration_group.add_argument(
"-w",
"--workers",
metavar="INTEGER",
dest="workers",
type=int,
help=f"""
{conf_doc['workers']}
[default: number of CPUs on this system]
""",
)

parser.add_argument(
dest="sources",
Expand Down Expand Up @@ -171,7 +182,7 @@ def main():
configuration = make_configuration(args)
mode = get_mode(args)

sys.exit(run(mode, configuration, args.sources))
sys.exit(run(mode, configuration, args.workers, args.sources))


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion gersemi/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
__license__ = "MPL 2.0"
__title__ = "gersemi"
__url__ = "https://github.com/BlankSpruce/gersemi"
__version__ = "0.10.0"
__version__ = "0.11.0"
36 changes: 33 additions & 3 deletions gersemi/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,24 @@
from enum import Enum
from hashlib import sha1
from itertools import chain
import multiprocessing
import os
from pathlib import Path
import sys
import textwrap
from typing import Iterable, Optional
import yaml


def number_of_workers():
result = multiprocessing.cpu_count()
if sys.platform == "win32":
# https://bugs.python.org/issue26903
# https://github.com/python/cpython/issues/89240
return min(result, 60)
return result


def doc(text: str) -> str:
return " ".join(textwrap.dedent(text).splitlines()).strip()

Expand Down Expand Up @@ -111,15 +122,22 @@ class Configuration:
),
)

workers: int = field(
default=number_of_workers(),
metadata=dict(
title="Workers",
description="Number of workers used to format multiple files in parallel.",
),
)

def summary(self):
hasher = sha1()
hasher.update(repr(self).encode("utf-8"))
return hasher.hexdigest()


def make_default_configuration_file():
default_configuration = Configuration()
d = asdict(default_configuration)
def make_configuration_file(configuration):
d = asdict(configuration)
d["list_expansion"] = d["list_expansion"].value
result = f"""
# yaml-language-server: $schema=https://raw.githubusercontent.com/BlankSpruce/gersemi/master/gersemi/configuration.schema.json
Expand All @@ -129,6 +147,10 @@ def make_default_configuration_file():
return result.strip()


def make_default_configuration_file():
return make_configuration_file(Configuration())


def find_common_parent_path(paths: Iterable[Path]) -> Path:
return Path(os.path.commonpath([path.absolute() for path in paths]))

Expand Down Expand Up @@ -168,6 +190,10 @@ def sanitize_list_expansion(list_expansion):
)


def sanitize_workers(workers):
return max(1, workers)


def load_configuration_from_file(configuration_file_path: Path) -> Configuration:
with enter_directory(configuration_file_path.parent):
with open(configuration_file_path, "r", encoding="utf-8") as f:
Expand All @@ -178,6 +204,8 @@ def load_configuration_from_file(configuration_file_path: Path) -> Configuration
config["list_expansion"] = sanitize_list_expansion(
config["list_expansion"]
)
if "workers" in config:
config["workers"] = sanitize_workers(config["workers"])
return Configuration(**config)


Expand All @@ -193,6 +221,8 @@ def override_configuration_with_args(
value = normalize_definitions(value)
if param == "list_expansion":
value = sanitize_list_expansion(value)
if param == "workers":
value = sanitize_workers(value)
setattr(configuration, param, value)
return configuration

Expand Down
6 changes: 6 additions & 0 deletions gersemi/configuration.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
"description": "Skip default sanity checks.",
"title": "Unsafe",
"type": "boolean"
},
"workers": {
"minimum": 1,
"description": "Number of workers used to format multiple files in parallel.",
"title": "Workers",
"type": "integer"
}
},
"title": "Configuration",
Expand Down
10 changes: 6 additions & 4 deletions gersemi/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ def consume_task_result(task_result: TaskResult) -> Tuple[Path, int]:
return path, return_code


def create_pool(is_stdin_in_sources):
def create_pool(is_stdin_in_sources, num_workers):
if is_stdin_in_sources:
return mp_dummy.Pool
return partial(mp.Pool, processes=mp.cpu_count())
return partial(mp.Pool, processes=num_workers)


def filter_already_formatted_files(
Expand All @@ -163,12 +163,14 @@ def store_files_in_cache(
cache.store_files(configuration_summary, files)


def run(mode: Mode, configuration: Configuration, sources: Iterable[Path]):
def run(
mode: Mode, configuration: Configuration, num_workers: int, sources: Iterable[Path]
):
configuration_summary = configuration.summary()
requested_files = get_files(sources)
task = select_task(mode, configuration)

pool_cm = create_pool(Path("-") in requested_files)
pool_cm = create_pool(Path("-") in requested_files, num_workers)
with create_cache() as cache, pool_cm() as pool:
files_to_format = list(
filter_already_formatted_files(
Expand Down
10 changes: 8 additions & 2 deletions tests/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from gersemi.configuration import (
Configuration,
ListExpansion,
make_default_configuration_file,
make_configuration_file,
)

try:
Expand Down Expand Up @@ -44,6 +44,10 @@ def generate(self, schema, mode=None):
}
result["properties"]["list_expansion"]["$ref"] = "#/$defs/ListExpansion"
del result["properties"]["list_expansion"]["allOf"]

del result["properties"]["workers"]["default"]
result["properties"]["workers"]["minimum"] = 1

return result


Expand All @@ -65,4 +69,6 @@ def test_example_file_in_repository_is_consistent_with_configuration_definition(
with open(example_path, "r", encoding="utf-8") as f:
example = f.read().strip()

assert example == make_default_configuration_file()
configuration = Configuration()
configuration.workers = 8
assert example == make_configuration_file(configuration)
Loading