Skip to content

Commit

Permalink
[rf] reformat code
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-monch committed Oct 21, 2024
1 parent e591531 commit 96a1909
Show file tree
Hide file tree
Showing 15 changed files with 548 additions and 493 deletions.
8 changes: 4 additions & 4 deletions datalad_remake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# to be found by datalad
command_suite = (
# description of the command suite, displayed in cmdline help
"DataLad remake command suite",
'DataLad remake command suite',
[
# specification of a command, any number of commands can be defined
(
Expand All @@ -26,7 +26,7 @@
# optional name of the command in the cmdline API
'make',
# optional name of the command in the Python API
'make'
'make',
),
(
# importable module that contains the command implementation
Expand All @@ -36,9 +36,9 @@
# optional name of the command in the cmdline API
'provision',
# optional name of the command in the Python API
'provision'
'provision',
),
]
],
)


Expand Down
68 changes: 38 additions & 30 deletions datalad_remake/annexremotes/remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@


class RemakeRemote(SpecialRemote):

def __init__(self, annex: Master):
super().__init__(annex)

Expand Down Expand Up @@ -86,10 +85,7 @@ def get_url_for_key(self, key: str) -> str:
self.annex.debug(f'get_url_for_key: key: {key!r}, urls: {urls!r}')
return urls[0]

def get_compute_info(self,
key: str
) -> tuple[dict[str, Any], Dataset]:

def get_compute_info(self, key: str) -> tuple[dict[str, Any], Dataset]:
def get_assigned_value(assignment: str) -> str:
return assignment.split('=', 1)[1]

Expand All @@ -106,10 +102,7 @@ def get_assigned_value(assignment: str) -> str:
return {
'root_version': root_version,
'this': this,
**{
name: spec[name]
for name in ['method', 'input', 'output', 'parameter']
}
**{name: spec[name] for name in ['method', 'input', 'output', 'parameter']},
}, dataset

def transfer_retrieve(self, key: str, file_name: str) -> None:
Expand All @@ -122,26 +115,33 @@ def transfer_retrieve(self, key: str, file_name: str) -> None:
lgr.debug('Starting provision')
self.annex.debug('Starting provision')
with provide_context(
dataset,
compute_info['root_version'],
compute_info['input']
dataset, compute_info['root_version'], compute_info['input']
) as worktree:
lgr.debug('Starting execution')
self.annex.debug('Starting execution')
execute(worktree, compute_info['method'], compute_info['parameter'], compute_info['output'])
execute(
worktree,
compute_info['method'],
compute_info['parameter'],
compute_info['output'],
)
lgr.debug('Starting collection')
self.annex.debug('Starting collection')
self._collect(worktree, dataset, compute_info['output'], compute_info['this'], file_name)
self._collect(
worktree,
dataset,
compute_info['output'],
compute_info['this'],
file_name,
)
lgr.debug('Leaving provision context')
self.annex.debug('Leaving provision context')

def checkpresent(self, key: str) -> bool:
# See if at least one URL with the remake url-scheme is present
return self.annex.geturls(key, f'{url_scheme}:') != []

def _find_dataset(self,
commit: str
) -> Dataset:
def _find_dataset(self, commit: str) -> Dataset:
"""Find the first enclosing dataset with the given commit"""
# TODO: get version override from configuration
start_dir = Path(self.annex.getgitdir()).parent.absolute()
Expand All @@ -150,23 +150,27 @@ def _find_dataset(self,
result = subprocess.run(
['git', 'cat-file', '-t', commit], # noqa: S607
stdout=subprocess.PIPE,
cwd=current_dir, check=False)
cwd=current_dir,
check=False,
)
if result.returncode == 0 and result.stdout.strip() == b'commit':
return Dataset(current_dir)
current_dir = current_dir.parent
msg = (
f'Could not find dataset with commit {commit!r}, starting from '
f'{start_dir}')
f'{start_dir}'
)
raise RemoteError(msg)

def _collect(self,
worktree: Path,
dataset: Dataset,
output_patterns: Iterable[str],
this: str,
this_destination: str,
) -> None:
"""Collect computation results for `this` (and all other outputs) """
def _collect(
self,
worktree: Path,
dataset: Dataset,
output_patterns: Iterable[str],
this: str,
this_destination: str,
) -> None:
"""Collect computation results for `this` (and all other outputs)"""

# Get all outputs that were created during computation
outputs = resolve_patterns(root_dir=worktree, patterns=output_patterns)
Expand All @@ -180,13 +184,17 @@ def _collect(self,
is_annexed = call_git_success(
['annex', 'whereis', str(file_path)],
cwd=dataset_path,
capture_output=True)
capture_output=True,
)
if is_annexed:
self.annex.debug(f'_collect: reinject: {worktree / output} -> {dataset_path}:{file_path}')
self.annex.debug(
f'_collect: reinject: {worktree / output} -> {dataset_path}:{file_path}'
)
call_git_success(
['annex', 'reinject', str(worktree / output), str(file_path)],
cwd=dataset_path,
capture_output=True)
capture_output=True,
)

# Collect `this` file. It has to be copied to the destination given
# by git-annex. Git-annex will check its integrity.
Expand Down
42 changes: 25 additions & 17 deletions datalad_remake/annexremotes/tests/test_hierarchies.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,18 @@


output_pattern_static = [
'a.txt', 'b.txt', 'new.txt',
'd2_subds0/a0.txt', 'd2_subds0/b0.txt', 'd2_subds0/new.txt',
'd2_subds0/d2_subds1/a1.txt', 'd2_subds0/d2_subds1/b1.txt', 'd2_subds0/d2_subds1/new.txt',
'd2_subds0/d2_subds1/d2_subds2/a2.txt', 'd2_subds0/d2_subds1/d2_subds2/b2.txt', 'd2_subds0/d2_subds1/d2_subds2/new.txt',
'a.txt',
'b.txt',
'new.txt',
'd2_subds0/a0.txt',
'd2_subds0/b0.txt',
'd2_subds0/new.txt',
'd2_subds0/d2_subds1/a1.txt',
'd2_subds0/d2_subds1/b1.txt',
'd2_subds0/d2_subds1/new.txt',
'd2_subds0/d2_subds1/d2_subds2/a2.txt',
'd2_subds0/d2_subds1/d2_subds2/b2.txt',
'd2_subds0/d2_subds1/d2_subds2/new.txt',
]


Expand All @@ -47,31 +55,29 @@
]


test_file_content = list(zip(
output_pattern_static,
['content: first\n', 'content: second\n', 'content: third\n'] * 4, strict=False)
test_file_content = list(
zip(
output_pattern_static,
['content: first\n', 'content: second\n', 'content: third\n'] * 4,
strict=False,
)
)


def _drop_files(dataset: Dataset,
files: Iterable[str]):
def _drop_files(dataset: Dataset, files: Iterable[str]):
for file in files:
dataset.drop(file, reckless='availability', result_renderer='disabled')
assert not (dataset.pathobj / file).exists()


def _check_content(dataset,
file_content: Iterable[tuple[str, str]]
):
def _check_content(dataset, file_content: Iterable[tuple[str, str]]):
for file, content in file_content:
assert (dataset.pathobj / file).read_text() == content


@pytest.mark.parametrize('output_pattern', [output_pattern_static, output_pattern_glob])
def test_end_to_end(tmp_path, monkeypatch, output_pattern):

root_dataset = create_simple_computation_dataset(
tmp_path, 'd2', 3, test_method)
root_dataset = create_simple_computation_dataset(tmp_path, 'd2', 3, test_method)

# run `make` command
results = root_dataset.make(
Expand All @@ -82,11 +88,13 @@ def test_end_to_end(tmp_path, monkeypatch, output_pattern):
'third=third',
],
output=output_pattern,
result_renderer='disabled')
result_renderer='disabled',
)

collected_output = [
str(Path(result['path']).relative_to(root_dataset.pathobj))
for result in results]
for result in results
]
assert set(collected_output) == set(output_pattern_static)

# check computation success
Expand Down
13 changes: 6 additions & 7 deletions datalad_remake/annexremotes/tests/test_remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def send(self, value):


def test_compute_remote_main(tmp_path, monkeypatch):

dataset = create_ds_hierarchy(tmp_path, 'ds1', 0)[0][2]
monkeypatch.chdir(dataset.path)

Expand All @@ -79,15 +78,15 @@ def test_compute_remote_main(tmp_path, monkeypatch):
subprocess.run(
['git', 'annex', 'info', 'a.txt'], # noqa: S607
stdout=subprocess.PIPE,
check=True).stdout.splitlines())).split(b': ')[1]
check=True,
).stdout.splitlines(),
)
).split(b': ')[1]

(dataset.pathobj / specification_dir).mkdir(parents=True)
(dataset.pathobj / specification_dir / '000001111122222').write_text(
build_json(
'echo',
[],
['a.txt'],
{'content': 'some_string'}))
build_json('echo', [], ['a.txt'], {'content': 'some_string'})
)

input_ = MockedInput()

Expand Down
Loading

0 comments on commit 96a1909

Please sign in to comment.