diff --git a/datalad_remake/__init__.py b/datalad_remake/__init__.py index 45fc1e0..cb44161 100644 --- a/datalad_remake/__init__.py +++ b/datalad_remake/__init__.py @@ -15,7 +15,7 @@ # to be found by datalad command_suite = ( # description of the command suite, displayed in cmdline help - "DataLad remake command suite", + 'DataLad remake command suite', [ # specification of a command, any number of commands can be defined ( @@ -26,7 +26,7 @@ # optional name of the command in the cmdline API 'make', # optional name of the command in the Python API - 'make' + 'make', ), ( # importable module that contains the command implementation @@ -36,9 +36,9 @@ # optional name of the command in the cmdline API 'provision', # optional name of the command in the Python API - 'provision' + 'provision', ), - ] + ], ) diff --git a/datalad_remake/annexremotes/remake_remote.py b/datalad_remake/annexremotes/remake_remote.py index edf6c5f..5b0417f 100644 --- a/datalad_remake/annexremotes/remake_remote.py +++ b/datalad_remake/annexremotes/remake_remote.py @@ -39,7 +39,6 @@ class RemakeRemote(SpecialRemote): - def __init__(self, annex: Master): super().__init__(annex) @@ -86,10 +85,7 @@ def get_url_for_key(self, key: str) -> str: self.annex.debug(f'get_url_for_key: key: {key!r}, urls: {urls!r}') return urls[0] - def get_compute_info(self, - key: str - ) -> tuple[dict[str, Any], Dataset]: - + def get_compute_info(self, key: str) -> tuple[dict[str, Any], Dataset]: def get_assigned_value(assignment: str) -> str: return assignment.split('=', 1)[1] @@ -106,10 +102,7 @@ def get_assigned_value(assignment: str) -> str: return { 'root_version': root_version, 'this': this, - **{ - name: spec[name] - for name in ['method', 'input', 'output', 'parameter'] - } + **{name: spec[name] for name in ['method', 'input', 'output', 'parameter']}, }, dataset def transfer_retrieve(self, key: str, file_name: str) -> None: @@ -122,16 +115,25 @@ def transfer_retrieve(self, key: str, file_name: str) -> None: lgr.debug('Starting provision') self.annex.debug('Starting provision') with provide_context( - dataset, - compute_info['root_version'], - compute_info['input'] + dataset, compute_info['root_version'], compute_info['input'] ) as worktree: lgr.debug('Starting execution') self.annex.debug('Starting execution') - execute(worktree, compute_info['method'], compute_info['parameter'], compute_info['output']) + execute( + worktree, + compute_info['method'], + compute_info['parameter'], + compute_info['output'], + ) lgr.debug('Starting collection') self.annex.debug('Starting collection') - self._collect(worktree, dataset, compute_info['output'], compute_info['this'], file_name) + self._collect( + worktree, + dataset, + compute_info['output'], + compute_info['this'], + file_name, + ) lgr.debug('Leaving provision context') self.annex.debug('Leaving provision context') @@ -139,9 +141,7 @@ def checkpresent(self, key: str) -> bool: # See if at least one URL with the remake url-scheme is present return self.annex.geturls(key, f'{url_scheme}:') != [] - def _find_dataset(self, - commit: str - ) -> Dataset: + def _find_dataset(self, commit: str) -> Dataset: """Find the first enclosing dataset with the given commit""" # TODO: get version override from configuration start_dir = Path(self.annex.getgitdir()).parent.absolute() @@ -150,23 +150,27 @@ def _find_dataset(self, result = subprocess.run( ['git', 'cat-file', '-t', commit], # noqa: S607 stdout=subprocess.PIPE, - cwd=current_dir, check=False) + cwd=current_dir, + check=False, + ) if result.returncode == 0 and result.stdout.strip() == b'commit': return Dataset(current_dir) current_dir = current_dir.parent msg = ( f'Could not find dataset with commit {commit!r}, starting from ' - f'{start_dir}') + f'{start_dir}' + ) raise RemoteError(msg) - def _collect(self, - worktree: Path, - dataset: Dataset, - output_patterns: Iterable[str], - this: str, - this_destination: str, - ) -> None: - """Collect computation results for `this` (and all other outputs) """ + def _collect( + self, + worktree: Path, + dataset: Dataset, + output_patterns: Iterable[str], + this: str, + this_destination: str, + ) -> None: + """Collect computation results for `this` (and all other outputs)""" # Get all outputs that were created during computation outputs = resolve_patterns(root_dir=worktree, patterns=output_patterns) @@ -180,13 +184,17 @@ def _collect(self, is_annexed = call_git_success( ['annex', 'whereis', str(file_path)], cwd=dataset_path, - capture_output=True) + capture_output=True, + ) if is_annexed: - self.annex.debug(f'_collect: reinject: {worktree / output} -> {dataset_path}:{file_path}') + self.annex.debug( + f'_collect: reinject: {worktree / output} -> {dataset_path}:{file_path}' + ) call_git_success( ['annex', 'reinject', str(worktree / output), str(file_path)], cwd=dataset_path, - capture_output=True) + capture_output=True, + ) # Collect `this` file. It has to be copied to the destination given # by git-annex. Git-annex will check its integrity. diff --git a/datalad_remake/annexremotes/tests/test_hierarchies.py b/datalad_remake/annexremotes/tests/test_hierarchies.py index c87b160..a09060c 100644 --- a/datalad_remake/annexremotes/tests/test_hierarchies.py +++ b/datalad_remake/annexremotes/tests/test_hierarchies.py @@ -32,10 +32,18 @@ output_pattern_static = [ - 'a.txt', 'b.txt', 'new.txt', - 'd2_subds0/a0.txt', 'd2_subds0/b0.txt', 'd2_subds0/new.txt', - 'd2_subds0/d2_subds1/a1.txt', 'd2_subds0/d2_subds1/b1.txt', 'd2_subds0/d2_subds1/new.txt', - 'd2_subds0/d2_subds1/d2_subds2/a2.txt', 'd2_subds0/d2_subds1/d2_subds2/b2.txt', 'd2_subds0/d2_subds1/d2_subds2/new.txt', + 'a.txt', + 'b.txt', + 'new.txt', + 'd2_subds0/a0.txt', + 'd2_subds0/b0.txt', + 'd2_subds0/new.txt', + 'd2_subds0/d2_subds1/a1.txt', + 'd2_subds0/d2_subds1/b1.txt', + 'd2_subds0/d2_subds1/new.txt', + 'd2_subds0/d2_subds1/d2_subds2/a2.txt', + 'd2_subds0/d2_subds1/d2_subds2/b2.txt', + 'd2_subds0/d2_subds1/d2_subds2/new.txt', ] @@ -47,31 +55,29 @@ ] -test_file_content = list(zip( - output_pattern_static, - ['content: first\n', 'content: second\n', 'content: third\n'] * 4, strict=False) +test_file_content = list( + zip( + output_pattern_static, + ['content: first\n', 'content: second\n', 'content: third\n'] * 4, + strict=False, + ) ) -def _drop_files(dataset: Dataset, - files: Iterable[str]): +def _drop_files(dataset: Dataset, files: Iterable[str]): for file in files: dataset.drop(file, reckless='availability', result_renderer='disabled') assert not (dataset.pathobj / file).exists() -def _check_content(dataset, - file_content: Iterable[tuple[str, str]] - ): +def _check_content(dataset, file_content: Iterable[tuple[str, str]]): for file, content in file_content: assert (dataset.pathobj / file).read_text() == content @pytest.mark.parametrize('output_pattern', [output_pattern_static, output_pattern_glob]) def test_end_to_end(tmp_path, monkeypatch, output_pattern): - - root_dataset = create_simple_computation_dataset( - tmp_path, 'd2', 3, test_method) + root_dataset = create_simple_computation_dataset(tmp_path, 'd2', 3, test_method) # run `make` command results = root_dataset.make( @@ -82,11 +88,13 @@ def test_end_to_end(tmp_path, monkeypatch, output_pattern): 'third=third', ], output=output_pattern, - result_renderer='disabled') + result_renderer='disabled', + ) collected_output = [ str(Path(result['path']).relative_to(root_dataset.pathobj)) - for result in results] + for result in results + ] assert set(collected_output) == set(output_pattern_static) # check computation success diff --git a/datalad_remake/annexremotes/tests/test_remake_remote.py b/datalad_remake/annexremotes/tests/test_remake_remote.py index 69408c5..004e6e4 100644 --- a/datalad_remake/annexremotes/tests/test_remake_remote.py +++ b/datalad_remake/annexremotes/tests/test_remake_remote.py @@ -64,7 +64,6 @@ def send(self, value): def test_compute_remote_main(tmp_path, monkeypatch): - dataset = create_ds_hierarchy(tmp_path, 'ds1', 0)[0][2] monkeypatch.chdir(dataset.path) @@ -79,15 +78,15 @@ def test_compute_remote_main(tmp_path, monkeypatch): subprocess.run( ['git', 'annex', 'info', 'a.txt'], # noqa: S607 stdout=subprocess.PIPE, - check=True).stdout.splitlines())).split(b': ')[1] + check=True, + ).stdout.splitlines(), + ) + ).split(b': ')[1] (dataset.pathobj / specification_dir).mkdir(parents=True) (dataset.pathobj / specification_dir / '000001111122222').write_text( - build_json( - 'echo', - [], - ['a.txt'], - {'content': 'some_string'})) + build_json('echo', [], ['a.txt'], {'content': 'some_string'}) + ) input_ = MockedInput() diff --git a/datalad_remake/commands/make_cmd.py b/datalad_remake/commands/make_cmd.py index f64797d..3f98114 100644 --- a/datalad_remake/commands/make_cmd.py +++ b/datalad_remake/commands/make_cmd.py @@ -57,122 +57,148 @@ class Make(ValidatedInterface): # first docstring line is used a short description in the cmdline help # the rest is put in the verbose help and manpage - """Specify a computation and optionally execute it - """ - - _validator_ = EnsureCommandParameterization({ - 'dataset': EnsureDataset(installed=True), - 'input': EnsureListOf(EnsureStr(min_len=1)), - 'input_list': EnsureStr(min_len=1), - 'output': EnsureListOf(EnsureStr(min_len=1), min_len=1), - 'output_list': EnsureStr(min_len=1), - 'parameter': EnsureListOf(EnsureStr(min_len=3)), - 'parameter_list': EnsureStr(min_len=1), - }) + """Specify a computation and optionally execute it""" + + _validator_ = EnsureCommandParameterization( + { + 'dataset': EnsureDataset(installed=True), + 'input': EnsureListOf(EnsureStr(min_len=1)), + 'input_list': EnsureStr(min_len=1), + 'output': EnsureListOf(EnsureStr(min_len=1), min_len=1), + 'output_list': EnsureStr(min_len=1), + 'parameter': EnsureListOf(EnsureStr(min_len=3)), + 'parameter_list': EnsureStr(min_len=1), + } + ) # parameters of the command, must be exhaustive _params_: ClassVar[dict[int, Parameter]] = { 'dataset': Parameter( args=('-d', '--dataset'), - doc="Dataset to be used as a configuration source. Beyond " - "reading configuration items, this command does not interact with " - "the dataset."), + doc='Dataset to be used as a configuration source. Beyond ' + 'reading configuration items, this command does not interact with ' + 'the dataset.', + ), 'url_only': Parameter( args=('-u', '--url-only'), - action="store_true", + action='store_true', doc="Don't perform the computation, register an URL-key " - "instead. A `git annex get ` will trigger the computation"), + 'instead. A `git annex get ` will trigger the computation', + ), 'template': Parameter( args=('template',), - doc="Name of the computing template (template should be present " - "in $DATASET/.datalad/remake/methods)"), + doc='Name of the computing template (template should be present ' + 'in $DATASET/.datalad/remake/methods)', + ), 'branch': Parameter( - args=('-b', '--branch',), - doc="Branch (or commit) that should be used for computation, if " - "not specified HEAD will be used"), + args=( + '-b', + '--branch', + ), + doc='Branch (or commit) that should be used for computation, if ' + 'not specified HEAD will be used', + ), 'input': Parameter( - args=('-i', '--input',), + args=( + '-i', + '--input', + ), action='append', - doc="An input file pattern (repeat for multiple inputs, " - "file pattern support python globbing, globbing is expanded " - "in the source dataset)"), + doc='An input file pattern (repeat for multiple inputs, ' + 'file pattern support python globbing, globbing is expanded ' + 'in the source dataset)', + ), 'input_list': Parameter( - args=('-I', '--input-list',), - doc="Name of a file that contains a list of input file patterns. " - "Format is one file per line, relative path from `dataset`. " - "Empty lines, i.e. lines that contain only newlines, and lines " - "that start with '#' are ignored. Line content is stripped " - "before used. This is useful if a large number of input file " - "patterns should be provided."), + args=( + '-I', + '--input-list', + ), + doc='Name of a file that contains a list of input file patterns. ' + 'Format is one file per line, relative path from `dataset`. ' + 'Empty lines, i.e. lines that contain only newlines, and lines ' + "that start with '#' are ignored. Line content is stripped " + 'before used. This is useful if a large number of input file ' + 'patterns should be provided.', + ), 'output': Parameter( - args=('-o', '--output',), + args=( + '-o', + '--output', + ), action='append', - doc="An output file pattern (repeat for multiple outputs)" - "file pattern support python globbing, globbing is expanded " - "in the worktree)"), + doc='An output file pattern (repeat for multiple outputs)' + 'file pattern support python globbing, globbing is expanded ' + 'in the worktree)', + ), 'output_list': Parameter( - args=('-O', '--output-list',), - doc="Name of a file that contains a list of output patterns. Format " - "is one file per line, relative path from `dataset`. Empty " - "lines, i.e. lines that contain only newlines, arg ignored. " - "This is useful if a large number of output files should be " - "provided."), + args=( + '-O', + '--output-list', + ), + doc='Name of a file that contains a list of output patterns. Format ' + 'is one file per line, relative path from `dataset`. Empty ' + 'lines, i.e. lines that contain only newlines, arg ignored. ' + 'This is useful if a large number of output files should be ' + 'provided.', + ), 'parameter': Parameter( - args=('-p', '--parameter',), + args=( + '-p', + '--parameter', + ), action='append', - doc="Input parameter in the form = (repeat for " - "multiple parameters)"), + doc='Input parameter in the form = (repeat for ' + 'multiple parameters)', + ), 'parameter_list': Parameter( - args=('-P', '--parameter-list',), - doc="Name of a file that contains a list of parameters. Format " - "is one `=` string per line. " - "Empty lines, i.e. lines that contain only newlines, and lines " - "that start with '#' are ignored. Line content is stripped " - "before used. This is useful if a large number of parameters " - "should be provided."), + args=( + '-P', + '--parameter-list', + ), + doc='Name of a file that contains a list of parameters. Format ' + 'is one `=` string per line. ' + 'Empty lines, i.e. lines that contain only newlines, and lines ' + "that start with '#' are ignored. Line content is stripped " + 'before used. This is useful if a large number of parameters ' + 'should be provided.', + ), } @staticmethod @datasetmethod(name='make') @eval_results - def __call__(dataset=None, - *, - url_only=False, - template=None, - branch=None, - input_=None, - input_list=None, - output=None, - output_list=None, - parameter=None, - parameter_list=None, - ): - - dataset : Dataset = dataset.ds if dataset else Dataset('.') + def __call__( + dataset=None, + *, + url_only=False, + template=None, + branch=None, + input_=None, + input_list=None, + output=None, + output_list=None, + parameter=None, + parameter_list=None, + ): + dataset: Dataset = dataset.ds if dataset else Dataset('.') input_pattern = (input_ or []) + read_list(input_list) output_pattern = (output or []) + read_list(output_list) parameter = (parameter or []) + read_list(parameter_list) - parameter_dict = { - p.split('=', 1)[0]: p.split('=', 1)[1] - for p in parameter} + parameter_dict = {p.split('=', 1)[0]: p.split('=', 1)[1] for p in parameter} # We have to get the URL first, because saving the specification to # the dataset will change the version. url_base, reset_commit = get_url( - dataset, - branch, - template, - parameter_dict, - input_pattern, - output_pattern) + dataset, branch, template, parameter_dict, input_pattern, output_pattern + ) if not url_only: with provide_context( - dataset, - branch, - input_pattern, + dataset, + branch, + input_pattern, ) as worktree: execute(worktree, template, parameter_dict, output_pattern) output = collect(worktree, dataset, output_pattern) @@ -180,42 +206,43 @@ def __call__(dataset=None, for out in output: url = add_url(dataset, out, url_base, url_only=url_only) yield get_status_dict( - action='make', - path=str(dataset.pathobj / out), - status='ok', - message=f'added url: {url!r} to {out!r} in {dataset.pathobj}',) + action='make', + path=str(dataset.pathobj / out), + status='ok', + message=f'added url: {url!r} to {out!r} in {dataset.pathobj}', + ) def read_list(list_file: str | Path | None) -> list[str]: if list_file is None: return [] - return list(filter( - lambda s: s != '' and not s.startswith('#'), - [ - line.strip() - for line in Path(list_file).read_text().splitlines(keepends=False) - ])) - - -def get_url(dataset: Dataset, - branch: str | None, - template_name: str, - parameters: dict[str, str], - input_pattern: list[str], - output_pattern: list[str], - ) -> tuple[str, str]: + return list( + filter( + lambda s: s != '' and not s.startswith('#'), + [ + line.strip() + for line in Path(list_file).read_text().splitlines(keepends=False) + ], + ) + ) + +def get_url( + dataset: Dataset, + branch: str | None, + template_name: str, + parameters: dict[str, str], + input_pattern: list[str], + output_pattern: list[str], +) -> tuple[str, str]: # If something goes wrong after the make specification was saved, # the dataset state should be reset to `branch` reset_branch = branch or dataset.repo.get_hexsha() # Write the specification to a file in the dataset digest = write_spec( - dataset, - template_name, - input_pattern, - output_pattern, - parameters) + dataset, template_name, input_pattern, output_pattern, parameters + ) return ( f'{url_scheme}:///' @@ -224,13 +251,13 @@ def get_url(dataset: Dataset, ), reset_branch -def write_spec(dataset: Dataset, - method: str, - input_pattern: list[str], - output_pattern: list[str], - parameters: dict[str, str] - ) -> str: - +def write_spec( + dataset: Dataset, + method: str, + input_pattern: list[str], + output_pattern: list[str], + parameters: dict[str, str], +) -> str: # create the specification and hash it spec = build_json(method, input_pattern, output_pattern, parameters) hasher = hashlib.sha256() @@ -242,38 +269,28 @@ def write_spec(dataset: Dataset, spec_dir.mkdir(exist_ok=True) spec_file = spec_dir / digest with contextlib.chdir(dataset.pathobj): - call_git_success( - ['annex', 'unlock', str(spec_file)], - capture_output=True) + call_git_success(['annex', 'unlock', str(spec_file)], capture_output=True) spec_file.write_text(spec) dataset.save( message=f'[DATALAD] saving computation spec\n\nfile name: {digest}', - recursive=True, result_renderer='disabled') + recursive=True, + result_renderer='disabled', + ) return digest -def build_json(method: str, - inputs: list[str], - outputs: list[str], - parameters: dict[str, str] - ) -> str: - return json.dumps({ - 'method': method, - 'input': inputs, - 'output': outputs, - 'parameter': parameters}) - +def build_json( + method: str, inputs: list[str], outputs: list[str], parameters: dict[str, str] +) -> str: + return json.dumps( + {'method': method, 'input': inputs, 'output': outputs, 'parameter': parameters} + ) -def add_url(dataset: Dataset, - file_path: str, - url_base: str, - *, - url_only: bool - ) -> str: +def add_url(dataset: Dataset, file_path: str, url_base: str, *, url_only: bool) -> str: lgr.debug( - 'add_url: %s %s %s %s', - str(dataset), str(file_path), url_base, repr(url_only)) + 'add_url: %s %s %s %s', str(dataset), str(file_path), url_base, repr(url_only) + ) # Build the file-specific URL and store it in the annex url = url_base + f'&this={quote(file_path)}' @@ -288,7 +305,8 @@ def add_url(dataset: Dataset, can_add = call_git_success( ['annex', 'whereis', str(file_path)], cwd=file_dataset_path, - capture_output=True) + capture_output=True, + ) # Add the URL if can_add: @@ -296,51 +314,47 @@ def add_url(dataset: Dataset, ['annex', 'addurl', url, '--file', file_path] + (['--relaxed'] if url_only else []), cwd=file_dataset_path, - capture_output=True) + capture_output=True, + ) if not success: msg = ( f'\naddurl failed:\nfile_dataset_path: {file_dataset_path}\n' - f'url: {url!r}\nfile_path: {file_path!r}') + f'url: {url!r}\nfile_path: {file_path!r}' + ) raise RuntimeError(msg) return url def get_file_dataset(file: Path) -> tuple[Path, Path]: - """ Get dataset of file and relative path of file from the dataset + """Get dataset of file and relative path of file from the dataset Determine the path of the dataset that contains the file and the relative path of the file in this dataset.""" - top_level = Path(call_git_oneline( - ['rev-parse', '--show-toplevel'], - cwd=file.parent)) - return ( - Path(top_level), - file.absolute().relative_to(top_level)) - + top_level = Path( + call_git_oneline(['rev-parse', '--show-toplevel'], cwd=file.parent) + ) + return (Path(top_level), file.absolute().relative_to(top_level)) -def provide(dataset: Dataset, - branch: str | None, - input_patterns: list[str], - ) -> Path: +def provide( + dataset: Dataset, + branch: str | None, + input_patterns: list[str], +) -> Path: lgr.debug('provide: %s %s %s', dataset, branch, input_patterns) result = dataset.provision( - input=input_patterns, - branch=branch, - result_renderer='disabled') + input=input_patterns, branch=branch, result_renderer='disabled' + ) return Path(result[0]['path']) @contextlib.contextmanager -def provide_context(dataset: Dataset, - branch: str | None, - input_patterns: list[str], - ) -> Generator: - - worktree = provide( - dataset, - branch=branch, - input_patterns=input_patterns) +def provide_context( + dataset: Dataset, + branch: str | None, + input_patterns: list[str], +) -> Generator: + worktree = provide(dataset, branch=branch, input_patterns=input_patterns) try: yield worktree finally: @@ -348,22 +362,24 @@ def provide_context(dataset: Dataset, dataset.provision(delete=worktree, result_renderer='disabled') -def execute(worktree: Path, - template_name: str, - parameter: dict[str, str], - output_pattern: list[str], - ) -> None: - +def execute( + worktree: Path, + template_name: str, + parameter: dict[str, str], + output_pattern: list[str], +) -> None: lgr.debug( - 'execute: %s %s %s %s', str(worktree), - template_name, repr(parameter), repr(output_pattern)) + 'execute: %s %s %s %s', + str(worktree), + template_name, + repr(parameter), + repr(output_pattern), + ) worktree_ds = Dataset(worktree) # Determine which outputs already exist - existing_outputs = resolve_patterns( - root_dir=worktree, - patterns=output_pattern) + existing_outputs = resolve_patterns(root_dir=worktree, patterns=output_pattern) # Get the subdatasets, directories, and files of the existing output space create_output_space(worktree_ds, existing_outputs) @@ -377,11 +393,11 @@ def execute(worktree: Path, compute(worktree, worktree / template_path, parameter) -def collect(worktree: Path, - dataset: Dataset, - output_pattern: Iterable[str], - ) -> set[str]: - +def collect( + worktree: Path, + dataset: Dataset, + output_pattern: Iterable[str], +) -> set[str]: output = resolve_patterns(root_dir=worktree, patterns=output_pattern) # Unlock output files in the dataset-directory and copy the result @@ -397,9 +413,7 @@ def collect(worktree: Path, return output -def unlock_files(dataset: Dataset, - files: Iterable[str] - ) -> None: +def unlock_files(dataset: Dataset, files: Iterable[str]) -> None: """Use datalad to resolve subdatasets and unlock files in the dataset.""" # TODO: for some reason `dataset unlock` does not operate in the # context of `dataset.pathobj`, so we need to change the working @@ -417,9 +431,7 @@ def unlock_files(dataset: Dataset, dataset.unlock(file, result_renderer='disabled') -def create_output_space(dataset: Dataset, - files: Iterable[str] - ) -> None: +def create_output_space(dataset: Dataset, files: Iterable[str]) -> None: """Get all files that are part of the output space.""" for f in files: with contextlib.suppress(IncompleteResultsError): diff --git a/datalad_remake/commands/provision_cmd.py b/datalad_remake/commands/provision_cmd.py index 19e8505..3bca2fd 100644 --- a/datalad_remake/commands/provision_cmd.py +++ b/datalad_remake/commands/provision_cmd.py @@ -3,6 +3,7 @@ Data is provisioned in a temporary worktree. All subdatasets are currently also provisioned. """ + from __future__ import annotations import logging @@ -60,79 +61,99 @@ class Provision(ValidatedInterface): environment for `make` commands. """ - _validator_ = EnsureCommandParameterization({ - 'dataset': EnsureDataset(installed=True), - 'input': EnsureListOf(EnsureStr(min_len=1)), - 'input_list': EnsureStr(min_len=1), - 'tmp_dir': EnsurePath(is_mode=stat.S_ISDIR), - 'delete': EnsureDataset(installed=True), - 'no_globbing': EnsureBool(), - }) + _validator_ = EnsureCommandParameterization( + { + 'dataset': EnsureDataset(installed=True), + 'input': EnsureListOf(EnsureStr(min_len=1)), + 'input_list': EnsureStr(min_len=1), + 'tmp_dir': EnsurePath(is_mode=stat.S_ISDIR), + 'delete': EnsureDataset(installed=True), + 'no_globbing': EnsureBool(), + } + ) # parameters of the command, must be exhaustive _params_: ClassVar[dict[str, Parameter]] = { 'dataset': Parameter( args=('-d', '--dataset'), - doc="Dataset to be used as a configuration source. Beyond " - "reading configuration items, this command does not interact with " - "the dataset."), + doc='Dataset to be used as a configuration source. Beyond ' + 'reading configuration items, this command does not interact with ' + 'the dataset.', + ), 'branch': Parameter( - args=('-b', '--branch',), - doc="Branch (or commit) that should be provisioned, if " - "not specified HEAD will be used"), + args=( + '-b', + '--branch', + ), + doc='Branch (or commit) that should be provisioned, if ' + 'not specified HEAD will be used', + ), 'delete': Parameter( args=('--delete',), - doc="Delete the temporary worktree WORKTREE that belongs the the " - "dataset (cannot be used with `-b`, `--branch`, `-i`," - "`--input`, `-I`, or `--input-list`)."), + doc='Delete the temporary worktree WORKTREE that belongs the the ' + 'dataset (cannot be used with `-b`, `--branch`, `-i`,' + '`--input`, `-I`, or `--input-list`).', + ), 'input': Parameter( - args=('-i', '--input',), + args=( + '-i', + '--input', + ), action='append', - doc="An input file pattern (repeat for multiple inputs, " - "file pattern support python globbing, globbing is done in the " - "worktree and through all matching subdatasets, installing " - "if necessary)."), + doc='An input file pattern (repeat for multiple inputs, ' + 'file pattern support python globbing, globbing is done in the ' + 'worktree and through all matching subdatasets, installing ' + 'if necessary).', + ), 'input_list': Parameter( - args=('-I', '--input-list',), - doc="Name of a file that contains a list of input file patterns. " - "Format is one file per line, relative path from `dataset`. " - "Empty lines, i.e. lines that contain only newlines, and lines " - "that start with '#' are ignored. Line content is stripped " - "before used. This is useful if a large number of input file " - "patterns should be provided."), + args=( + '-I', + '--input-list', + ), + doc='Name of a file that contains a list of input file patterns. ' + 'Format is one file per line, relative path from `dataset`. ' + 'Empty lines, i.e. lines that contain only newlines, and lines ' + "that start with '#' are ignored. Line content is stripped " + 'before used. This is useful if a large number of input file ' + 'patterns should be provided.', + ), 'worktree_dir': Parameter( - args=('-w', '--worktree-dir',), - doc="Path of the directory that should become the temporary " - "worktree, defaults to `tempfile.TemporaryDirectory().name`."), + args=( + '-w', + '--worktree-dir', + ), + doc='Path of the directory that should become the temporary ' + 'worktree, defaults to `tempfile.TemporaryDirectory().name`.', + ), } @staticmethod @datasetmethod(name='provision') @eval_results - def __call__(dataset=None, - branch=None, - delete=None, - input_=None, - input_list=None, - worktree_dir=None, - ): - - dataset : Dataset = dataset.ds if dataset else Dataset('.') + def __call__( + dataset=None, + branch=None, + delete=None, + input_=None, + input_list=None, + worktree_dir=None, + ): + dataset: Dataset = dataset.ds if dataset else Dataset('.') if delete: if branch or input_: msg = ( 'Cannot use `-d`, `--delete` with `-b`, `--branch`,' ' `-i`, or `--input`' ) - raise ValueError( - msg) + raise ValueError(msg) remove(dataset, delete.ds) yield get_status_dict( action='provision [delete]', path=delete.ds.path, status='ok', - message=f'delete workspace: {delete.ds.path!r} from dataset {dataset}') + message=f'delete workspace: {delete.ds.path!r} from dataset {dataset}', + ) return worktree_dir: Path = Path(worktree_dir or TemporaryDirectory().name) @@ -140,29 +161,24 @@ def __call__(dataset=None, yield from provide(dataset, worktree_dir, inputs, branch) -def remove(dataset: Dataset, - worktree: Dataset - ) -> None: +def remove(dataset: Dataset, worktree: Dataset) -> None: worktree.drop( - what='all', - reckless='kill', - recursive=True, - result_renderer='disabled') + what='all', reckless='kill', recursive=True, result_renderer='disabled' + ) prune_worktrees(dataset) - call_git_success( - ['branch', '-d', worktree.pathobj.name], - cwd=dataset.pathobj) + call_git_success(['branch', '-d', worktree.pathobj.name], cwd=dataset.pathobj) def prune_worktrees(dataset: Dataset) -> None: call_git_lines(['worktree', 'prune'], cwd=dataset.pathobj) -def provide(dataset: Dataset, - worktree_dir: Path, - input_patterns: list[str], - source_branch: str | None = None, - ) -> Generator: +def provide( + dataset: Dataset, + worktree_dir: Path, + input_patterns: list[str], + source_branch: str | None = None, +) -> Generator: """Provide paths defined by input_patterns in a temporary worktree Parameters @@ -186,10 +202,10 @@ def provide(dataset: Dataset, worktree_dir.mkdir(parents=True, exist_ok=True) # Create a worktree - args = ['worktree', 'add'] + [str(worktree_dir)] + ( - [source_branch] - if source_branch - else [] + args = ( + ['worktree', 'add'] + + [str(worktree_dir)] + + ([source_branch] if source_branch else []) ) call_git_lines(args, cwd=dataset.pathobj) @@ -201,7 +217,8 @@ def provide(dataset: Dataset, path=element['path'], status='error', state=element['state'], - message=f'cannot provision {element["state"]} input: {element["path"]!r} from dataset {dataset}') + message=f'cannot provision {element["state"]} input: {element["path"]!r} from dataset {dataset}', + ) if is_dirty: return @@ -216,13 +233,13 @@ def provide(dataset: Dataset, action='provision', path=str(worktree_dir), status='ok', - message=f'provisioned dataset: {dataset} in workspace: {worktree_dir!r}',) + message=f'provisioned dataset: {dataset} in workspace: {worktree_dir!r}', + ) -def resolve_patterns(dataset: Dataset, - worktree: Dataset, - pattern_list: list[str] - ) -> set[Path]: +def resolve_patterns( + dataset: Dataset, worktree: Dataset, pattern_list: list[str] +) -> set[Path]: """Resolve file patterns in the dataset This method will resolve relative path-patterns in the dataset. It will @@ -258,7 +275,9 @@ def resolve_patterns(dataset: Dataset, Path(), pattern_parts, get_uninstalled_subdatasets(worktree), - get_installed_subdatasets(dataset))) + get_installed_subdatasets(dataset), + ) + ) return matches @@ -267,15 +286,17 @@ def get_uninstalled_subdatasets(dataset: Dataset) -> set[Path]: return { Path(result['path']).relative_to(dataset.pathobj) for result in dataset.subdatasets(recursive=True, result_renderer='disabled') - if result['state'] == 'absent'} + if result['state'] == 'absent' + } -def glob_pattern(root: Dataset, - position: Path, - pattern: list[str], - uninstalled_subdatasets: set[Path], - locally_available_subdatasets: Iterable[tuple[Path, Path, Path]], - ) -> set[Path]: +def glob_pattern( + root: Dataset, + position: Path, + pattern: list[str], + uninstalled_subdatasets: set[Path], + locally_available_subdatasets: Iterable[tuple[Path, Path, Path]], +) -> set[Path]: """Glob a pattern in a dataset installing subdatasets if necessary Parameters @@ -311,15 +332,15 @@ def glob_pattern(root: Dataset, position, pattern[1:], uninstalled_subdatasets, - locally_available_subdatasets) + locally_available_subdatasets, + ) else: result = set() # Match all elements at the current position with the first part of the # pattern. for rec_match in glob( - '*' if pattern[0] == '**' else pattern[0], - root_dir=root.pathobj / position + '*' if pattern[0] == '**' else pattern[0], root_dir=root.pathobj / position ): match = position / rec_match @@ -329,10 +350,8 @@ def glob_pattern(root: Dataset, if match.is_dir() and match in uninstalled_subdatasets: lgr.info('Installing subdataset %s to glob input', match) install_subdataset( - root, - match, - uninstalled_subdatasets, - locally_available_subdatasets) + root, match, uninstalled_subdatasets, locally_available_subdatasets + ) # We have a match, try to match the remainder of the pattern. submatch_pattern = pattern if pattern[0] == '**' else pattern[1:] @@ -342,7 +361,9 @@ def glob_pattern(root: Dataset, match, submatch_pattern, uninstalled_subdatasets, - locally_available_subdatasets)) + locally_available_subdatasets, + ) + ) return result @@ -354,43 +375,47 @@ def get_dirty_elements(dataset: Dataset) -> Generator: yield result -def install_subdataset(worktree: Dataset, - subdataset_path: Path, - uninstalled_subdatasets: set[Path], - locally_available_datasets: Iterable[tuple[Path, Path, Path]], - ) -> None: +def install_subdataset( + worktree: Dataset, + subdataset_path: Path, + uninstalled_subdatasets: set[Path], + locally_available_datasets: Iterable[tuple[Path, Path, Path]], +) -> None: """Install a subdataset, prefer locally available subdatasets""" - local_subdataset = ([ - dataset - for dataset in locally_available_datasets - if dataset[2] == subdataset_path] or [None])[0] + local_subdataset = ( + [ + dataset + for dataset in locally_available_datasets + if dataset[2] == subdataset_path + ] + or [None] + )[0] if local_subdataset: absolute_path, parent_ds_path, path_from_root = local_subdataset # Set the URL to the full source path - args = ['-C', str(worktree.pathobj / parent_ds_path), - 'submodule', 'set-url', '--', - str(path_from_root.relative_to(parent_ds_path)), - 'file://' + str(absolute_path)] + args = [ + '-C', + str(worktree.pathobj / parent_ds_path), + 'submodule', + 'set-url', + '--', + str(path_from_root.relative_to(parent_ds_path)), + 'file://' + str(absolute_path), + ] call_git_lines(args) - worktree.get( - str(subdataset_path), - get_data=False, - result_renderer='disabled') + worktree.get(str(subdataset_path), get_data=False, result_renderer='disabled') uninstalled_subdatasets.remove(subdataset_path) uninstalled_subdatasets.update(get_uninstalled_subdatasets(worktree)) -def get_installed_subdatasets(dataset: Dataset - ) -> Iterable[tuple[Path, Path, Path]]: - results = dataset.subdatasets( - recursive=True, - result_renderer='disabled') +def get_installed_subdatasets(dataset: Dataset) -> Iterable[tuple[Path, Path, Path]]: + results = dataset.subdatasets(recursive=True, result_renderer='disabled') return [ ( Path(result['path']), Path(result['parentds']).relative_to(dataset.pathobj), - Path(result['path']).relative_to(dataset.pathobj) + Path(result['path']).relative_to(dataset.pathobj), ) for result in results if result['state'] == 'present' diff --git a/datalad_remake/commands/tests/create_datasets.py b/datalad_remake/commands/tests/create_datasets.py index ca2c545..06e6a0b 100644 --- a/datalad_remake/commands/tests/create_datasets.py +++ b/datalad_remake/commands/tests/create_datasets.py @@ -15,23 +15,29 @@ def update_config_for_remake(dataset: Dataset): scope='local', recursive=True, spec=[('annex.security.allow-unverified-downloads', 'ACKTHPPT')], - result_renderer='disabled') + result_renderer='disabled', + ) def add_remake_remote(dataset: Dataset): - call_git_success([ - '-C', dataset.path, - 'annex', 'initremote', 'remake', - 'type=external', 'externaltype=datalad-remake', - 'encryption=none'], - capture_output=True) - - -def create_ds_hierarchy(tmp_path: Path, - name: str, - subdataset_levels: int = 2 - ) -> list[tuple[str, Path, Dataset]]: - + call_git_success( + [ + '-C', + dataset.path, + 'annex', + 'initremote', + 'remake', + 'type=external', + 'externaltype=datalad-remake', + 'encryption=none', + ], + capture_output=True, + ) + + +def create_ds_hierarchy( + tmp_path: Path, name: str, subdataset_levels: int = 2 +) -> list[tuple[str, Path, Dataset]]: # Create root dataset root_dataset = Dataset(tmp_path / name) root_dataset.create(force=True, result_renderer='disabled') @@ -52,7 +58,7 @@ def create_ds_hierarchy(tmp_path: Path, # Link the datasets for index in range(len(datasets) - 2, -1, -1): - dataset, subdataset = datasets[index:index+2] + dataset, subdataset = datasets[index : index + 2] dataset[2].install( path=subdataset[0], source='file://' + subdataset[2].path, @@ -73,12 +79,12 @@ def create_ds_hierarchy(tmp_path: Path, return datasets -def create_simple_computation_dataset(tmp_path: Path, - dataset_name: str, - subdataset_levels: int, - test_method: str, - ) -> Dataset: - +def create_simple_computation_dataset( + tmp_path: Path, + dataset_name: str, + subdataset_levels: int, + test_method: str, +) -> Dataset: datasets = create_ds_hierarchy(tmp_path, dataset_name, subdataset_levels) root_dataset = datasets[0][2] diff --git a/datalad_remake/commands/tests/test_collection.py b/datalad_remake/commands/tests/test_collection.py index 371e625..e0c1590 100644 --- a/datalad_remake/commands/tests/test_collection.py +++ b/datalad_remake/commands/tests/test_collection.py @@ -6,14 +6,11 @@ def test_collect(tmp_path): - dataset = create_ds_hierarchy(tmp_path, 'ds1', 1)[0][2] worktree_dir = tmp_path / 'ds1_worktree' worktree_dir.mkdir(parents=True, exist_ok=False) - worktree = dataset.provision( - worktree_dir=worktree_dir, - result_renderer='disabled') + worktree = dataset.provision(worktree_dir=worktree_dir, result_renderer='disabled') result_dir = worktree_dir / 'results' / 'sub-01' result_dir.mkdir(parents=True) @@ -23,7 +20,10 @@ def test_collect(tmp_path): result = collect( worktree=Path(worktree[0]['path']), dataset=dataset, - output_pattern=['results/**'] + output_pattern=['results/**'], ) assert result == {'results/sub-01/a.txt', 'results/sub-01/b.txt'} - assert set(get_file_list(dataset.pathobj / 'results')) == {'sub-01/a.txt', 'sub-01/b.txt'} + assert set(get_file_list(dataset.pathobj / 'results')) == { + 'sub-01/a.txt', + 'sub-01/b.txt', + } diff --git a/datalad_remake/commands/tests/test_compute.py b/datalad_remake/commands/tests/test_compute.py index 4e51fbb..6b763da 100644 --- a/datalad_remake/commands/tests/test_compute.py +++ b/datalad_remake/commands/tests/test_compute.py @@ -15,9 +15,7 @@ def test_duplicated_computation(tmp_path): - - root_dataset = create_simple_computation_dataset( - tmp_path, 'ds1', 0, test_method) + root_dataset = create_simple_computation_dataset(tmp_path, 'ds1', 0, test_method) # run the same command twice _run_simple_computation(root_dataset) @@ -25,20 +23,21 @@ def test_duplicated_computation(tmp_path): def test_speculative_computation(tmp_path, datalad_cfg): - - root_dataset = create_simple_computation_dataset( - tmp_path, 'ds1', 0, test_method) + root_dataset = create_simple_computation_dataset(tmp_path, 'ds1', 0, test_method) root_dataset.make( template='test_method', parameter=['name=Robert', 'file=spec.txt'], output=['spec.txt'], url_only=True, - result_renderer='disabled') + result_renderer='disabled', + ) # set annex security related variables to allow datalad-remake-URLs # in speculative make commands - datalad_cfg.set('annex.security.allow-unverified-downloads', 'ACKTHPPT', scope='global') + datalad_cfg.set( + 'annex.security.allow-unverified-downloads', 'ACKTHPPT', scope='global' + ) # Perform the speculative computation root_dataset.get('spec.txt') @@ -50,7 +49,8 @@ def _run_simple_computation(root_dataset: Dataset): template='test_method', parameter=['name=Robert', 'file=a.txt'], output=['a.txt'], - result_renderer='disabled') + result_renderer='disabled', + ) # check that the output is correct assert (root_dataset.pathobj / 'a.txt').read_text() == 'Hello Robert\n' diff --git a/datalad_remake/commands/tests/test_listhandling.py b/datalad_remake/commands/tests/test_listhandling.py index a1864f0..dd18095 100644 --- a/datalad_remake/commands/tests/test_listhandling.py +++ b/datalad_remake/commands/tests/test_listhandling.py @@ -27,17 +27,19 @@ def test_list_reading_strip(tmp_path: Path): assert read_list(str(list_file)) == ['a', 'b', 'c'] -def _test_wordlist(tmp_path: Path, - word_list: list[str], - ) -> None: +def _test_wordlist( + tmp_path: Path, + word_list: list[str], +) -> None: list_file = _write_list(tmp_path, word_list) assert read_list(str(list_file)) == word_list assert read_list(list_file) == word_list -def _write_list(tmp_path: Path, - word_list: list[str], - ) -> Path: +def _write_list( + tmp_path: Path, + word_list: list[str], +) -> Path: list_file = tmp_path / 'list.txt' list_file.write_text('\n'.join(word_list)) return list_file diff --git a/datalad_remake/commands/tests/test_provision.py b/datalad_remake/commands/tests/test_provision.py index cf4cc71..3fcb8f7 100644 --- a/datalad_remake/commands/tests/test_provision.py +++ b/datalad_remake/commands/tests/test_provision.py @@ -23,33 +23,29 @@ all_paths = [ - template.format(file=f) - for template in file_path_templates - for f in ['a', 'b'] + template.format(file=f) for template in file_path_templates for f in ['a', 'b'] ] -a_paths = [ - path.format(file='a') - for path in file_path_templates -] +a_paths = [path.format(file='a') for path in file_path_templates] -b_paths = [ - path.format(file='b') - for path in file_path_templates -] +b_paths = [path.format(file='b') for path in file_path_templates] def test_worktree_basic(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] inputs = [ - 'a.txt', 'b.txt', - 'ds1_subds0/a0.txt', 'ds1_subds0/b0.txt', - 'ds1_subds0/ds1_subds1/a1.txt', 'ds1_subds0/ds1_subds1/b1.txt' + 'a.txt', + 'b.txt', + 'ds1_subds0/a0.txt', + 'ds1_subds0/b0.txt', + 'ds1_subds0/ds1_subds1/a1.txt', + 'ds1_subds0/ds1_subds1/b1.txt', ] provision_result = dataset.provision( worktree_dir=tmp_path / 'ds1_worktree1', input=inputs, - result_renderer='disabled')[0] + result_renderer='disabled', + )[0] worktree = Dataset(provision_result['path']) # Check input availability @@ -67,10 +63,8 @@ def check_deleted_worktrees(ds: Dataset): check_deleted_worktrees(dataset) dataset.drop( - what='all', - reckless='kill', - recursive=True, - result_renderer='disabled') + what='all', reckless='kill', recursive=True, result_renderer='disabled' + ) def test_worktree_globbing(tmp_path): @@ -88,10 +82,7 @@ def test_worktree_globbing(tmp_path): worktree = Path(result['path']) worktree_set = set(get_file_list(worktree)) - assert worktree_set == { - path.format(ds_name='ds1') - for path in all_paths - } + assert worktree_set == {path.format(ds_name='ds1') for path in all_paths} dataset.provision(delete=worktree, result_renderer='disabled') result = dataset.provision( @@ -107,23 +98,17 @@ def test_worktree_globbing(tmp_path): worktree = Path(result['path']) worktree_set = set(get_file_list(worktree)) - assert { - path.format(ds_name='ds1') - for path in b_paths - }.issubset(worktree_set) + assert {path.format(ds_name='ds1') for path in b_paths}.issubset(worktree_set) dataset.provision(delete=worktree, result_renderer='disabled') dataset.drop( - what='all', - reckless='kill', - recursive=True, - result_renderer='disabled') + what='all', reckless='kill', recursive=True, result_renderer='disabled' + ) -def get_file_list(root: Path, - path: Path|None = None, - prefix: Path|None = None - ) -> Iterable[str]: +def get_file_list( + root: Path, path: Path | None = None, prefix: Path | None = None +) -> Iterable[str]: prefix = prefix or Path('') path = path or root for child in path.iterdir(): @@ -156,31 +141,31 @@ def test_unclean_dataset(tmp_path): input=input_pattern, worktree_dir=tmp_path / 'ds1_worktree1', on_failure='ignore', - result_renderer='disabled') - assert {(result['status'], result['state']) for result in results} == \ - {('error', 'modified'), ('error', 'untracked')} + result_renderer='disabled', + ) + assert {(result['status'], result['state']) for result in results} == { + ('error', 'modified'), + ('error', 'untracked'), + } # Check that a saved dataset can be provisioned dataset.save() dataset.provision( input=input_pattern, worktree_dir=tmp_path / 'ds1_worktree2', - result_renderer='disabled') + result_renderer='disabled', + ) def test_branch_deletion_after_provision(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] with provide_context( - dataset=dataset, - branch=None, - input_patterns=['a.txt'] + dataset=dataset, branch=None, input_patterns=['a.txt'] ) as worktree: assert worktree.exists() assert not worktree.exists() with contextlib.chdir(dataset.path): - branches = [ - line.strip() - for line in call_git_lines(['branch'])] + branches = [line.strip() for line in call_git_lines(['branch'])] assert worktree.name not in branches @@ -188,32 +173,37 @@ def test_not_present_local_datasets(tmp_path): root_ds = Dataset(tmp_path / 'ds1') root_ds.create(cfg_proc='text2git', result_renderer='disabled') root_ds.clone( - 'https://github.com/OpenNeuroDatasets/ds000102', - result_renderer='disabled') + 'https://github.com/OpenNeuroDatasets/ds000102', result_renderer='disabled' + ) provisioned_dataset = Dataset( - root_ds.provision( - input=['ds000102/README'], - result_renderer='disabled')[0]['path']) + root_ds.provision(input=['ds000102/README'], result_renderer='disabled')[0][ + 'path' + ] + ) url = _get_submodule_url(provisioned_dataset, 'ds000102') assert url.startswith(f'file://{root_ds.path}') root_ds.drop( - 'ds000102', - what='all', - reckless='availability', - result_renderer='disabled') + 'ds000102', what='all', reckless='availability', result_renderer='disabled' + ) provisioned_dataset_2 = Dataset( root_ds.provision( - input=['ds000102/README'], - on_failure='ignore', - result_renderer='disabled')[0]['path']) + input=['ds000102/README'], on_failure='ignore', result_renderer='disabled' + )[0]['path'] + ) url_2 = _get_submodule_url(provisioned_dataset_2, 'ds000102') assert url_2 == 'https://github.com/OpenNeuroDatasets/ds000102' def _get_submodule_url(dataset: Dataset, submodule_path: str) -> str: x = call_git_lines( - ['config', '-f', str(dataset.pathobj / '.gitmodules'), '--get', - f'submodule.{submodule_path}.url']) + [ + 'config', + '-f', + str(dataset.pathobj / '.gitmodules'), + '--get', + f'submodule.{submodule_path}.url', + ] + ) return x[0].strip() diff --git a/datalad_remake/tests/test_register.py b/datalad_remake/tests/test_register.py index a8781a3..89f3a7f 100644 --- a/datalad_remake/tests/test_register.py +++ b/datalad_remake/tests/test_register.py @@ -1,6 +1,5 @@ - - def test_register(): import datalad.api as da + assert hasattr(da, 'make') assert hasattr(da, 'provision') diff --git a/datalad_remake/utils/compute.py b/datalad_remake/utils/compute.py index 3f816ee..01a9f50 100644 --- a/datalad_remake/utils/compute.py +++ b/datalad_remake/utils/compute.py @@ -12,9 +12,10 @@ lgr = logging.getLogger('datalad.compute') -def substitute_string(format_str: str, - replacements: dict[str, str], - ) -> str: +def substitute_string( + format_str: str, + replacements: dict[str, str], +) -> str: for variable_name, replacement in replacements.items(): place_holder = '{' + variable_name + '}' if place_holder in format_str: @@ -22,21 +23,21 @@ def substitute_string(format_str: str, return format_str -def substitute_arguments(spec: dict[str, Any], - replacements: dict[str, str], - format_list_id: str, - ) -> list[str]: - +def substitute_arguments( + spec: dict[str, Any], + replacements: dict[str, str], + format_list_id: str, +) -> list[str]: return [ substitute_string(str(format_str), replacements) for format_str in spec[format_list_id] ] -def get_substitutions(template: dict[str, Any], - arguments: dict[str, str], - ) -> dict[str, str]: - +def get_substitutions( + template: dict[str, Any], + arguments: dict[str, str], +) -> dict[str, str]: # Check the user specified inputs inputs = template['inputs'] if len(inputs) != len(arguments.keys()): @@ -45,24 +46,22 @@ def get_substitutions(template: dict[str, Any], if not all(input_name in arguments for input_name in inputs): msg = ( f'Template inputs and arguments have different names: ' - f'inputs: {inputs}, arguments: {arguments}') + f'inputs: {inputs}, arguments: {arguments}' + ) raise ValueError(msg) if len(inputs) != len(set(inputs)): msg = 'Template inputs contain duplicates' raise ValueError(msg) - return { - input_name: arguments[input_name] - for input_name in inputs - } - + return {input_name: arguments[input_name] for input_name in inputs} -def compute(root_directory: Path, - template_path: Path, - compute_arguments: dict[str, str], - ) -> None: +def compute( + root_directory: Path, + template_path: Path, + compute_arguments: dict[str, str], +) -> None: with template_path.open('rb') as f: template = tomllib.load(f) @@ -70,16 +69,20 @@ def compute(root_directory: Path, substitutions['root_directory'] = str(root_directory) substituted_executable = substitute_string(template['executable'], substitutions) - substituted_arguments = substitute_arguments( - template, - substitutions, - 'arguments' - ) + substituted_arguments = substitute_arguments(template, substitutions, 'arguments') with contextlib.chdir(root_directory): if template.get('use_shell', 'false') == 'true': - lgr.debug(f'compute: RUNNING: with shell=True: {" ".join([substituted_executable, *substituted_arguments])}') - subprocess.run(' '.join([substituted_executable, *substituted_arguments]), shell=True, check=True) # noqa: S602 + lgr.debug( + f'compute: RUNNING: with shell=True: {" ".join([substituted_executable, *substituted_arguments])}' + ) + subprocess.run( + ' '.join([substituted_executable, *substituted_arguments]), + shell=True, + check=True, + ) # noqa: S602 else: - lgr.debug(f'compute: RUNNING: {[substituted_executable, *substituted_arguments]}') + lgr.debug( + f'compute: RUNNING: {[substituted_executable, *substituted_arguments]}' + ) subprocess.run([substituted_executable, *substituted_arguments], check=True) diff --git a/datalad_remake/utils/glob.py b/datalad_remake/utils/glob.py index 607954e..3ffbdbb 100644 --- a/datalad_remake/utils/glob.py +++ b/datalad_remake/utils/glob.py @@ -10,12 +10,13 @@ # Resolve input file patterns in the original dataset -def resolve_patterns(root_dir: str | Path, - patterns: Iterable[str] - ) -> set[str]: +def resolve_patterns(root_dir: str | Path, patterns: Iterable[str]) -> set[str]: return set( filter( lambda p: not (Path(root_dir) / p).is_dir(), chain.from_iterable( glob(pattern, root_dir=str(root_dir), recursive=True) - for pattern in patterns))) + for pattern in patterns + ), + ) + ) diff --git a/datalad_remake/utils/tests/test_substitution.py b/datalad_remake/utils/tests/test_substitution.py index 2ee2480..86d4ff9 100644 --- a/datalad_remake/utils/tests/test_substitution.py +++ b/datalad_remake/utils/tests/test_substitution.py @@ -1,5 +1,3 @@ - - from ..compute import ( substitute_arguments, substitute_string, @@ -7,10 +5,13 @@ def test_multiple_substitutions(): - assert substitute_string( - 'This is a {test} with {multiple} substitutions', - {'test': 'string', 'multiple': 'multiple'}, - ) == 'This is a string with multiple substitutions' + assert ( + substitute_string( + 'This is a {test} with {multiple} substitutions', + {'test': 'string', 'multiple': 'multiple'}, + ) + == 'This is a string with multiple substitutions' + ) def test_argument_substitution(): @@ -20,11 +21,12 @@ def test_argument_substitution(): ] s = substitute_arguments( {'arguments': arguments}, - {'root_directory': '/path/to/root', + { + 'root_directory': '/path/to/root', 'input_dir': 'input', 'output_dir': 'output', }, - 'arguments' + 'arguments', ) assert s == [ '/path/to/root/input',