Skip to content

Commit

Permalink
feat: add label to compute instruction URL
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-monch committed Nov 18, 2024
1 parent 0e725dc commit be229c2
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 14 deletions.
4 changes: 2 additions & 2 deletions datalad_remake/annexremotes/remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def getcost(self) -> int:
return 100

def get_url_encoded_info(self, url: str) -> list[str]:
parts = urlparse(url).query.split('&', 5)
parts = urlparse(url).query.split('&', 3)
self.annex.debug(f'get_url_encoded_info: url: {url!r}, parts: {parts!r}')
return parts

Expand All @@ -100,7 +100,7 @@ def get_compute_info(
def get_assigned_value(assignment: str) -> str:
return assignment.split('=', 1)[1]

root_version, spec_name, this = (
label, root_version, spec_name, this = (
unquote(get_assigned_value(expr))
for expr in self.get_url_encoded_info(self.get_url_for_key(key))
)
Expand Down
13 changes: 8 additions & 5 deletions datalad_remake/annexremotes/tests/test_remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,15 @@ def test_compute_remote_main(tmp_path, datalad_cfg, monkeypatch, trusted):
input_.send(f'TRANSFER RETRIEVE {key.decode()} {tmp_path / "remade.txt"!s}\n')
# The next line is the answer to `GETCONFIG allow_untrusted_execution`
input_.send(f'VALUE {"false" if trusted else "true"}\n')
url = (
'datalad-make:///?'
f'root_version={dataset.repo.get_hexsha()}'
'&specification=000001111122222'
'&this=a.txt'
url = 'datalad-make:///?' + '&'.join(
[
'label=test1',
f'root_version={dataset.repo.get_hexsha()}',
'specification=000001111122222',
'this=a.txt',
]
)

# The next line is the answer to
# `GETURLS MD5E-s2--60b725f10c9c85c70d97880dfe8191b3.txt datalad-remake:`
input_.send(f'VALUE {url}\n')
Expand Down
40 changes: 33 additions & 7 deletions datalad_remake/commands/make_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class Make(ValidatedInterface):
{
'dataset': EnsureDataset(installed=True),
'template': EnsureStr(min_len=1),
'label': EnsureStr(),
'input': EnsureListOf(EnsureStr(min_len=1)),
'input_list': EnsurePath(),
'output': EnsureListOf(EnsureStr(min_len=1), min_len=1),
Expand Down Expand Up @@ -102,6 +103,20 @@ class Make(ValidatedInterface):
doc='Name of the computing template (template should be present '
'in $DATASET/.datalad/remake/methods)',
),
'label': Parameter(
args=(
'-l',
'--label',
),
doc='Label of the computation. This is a user defined name that '
'is used to identify and prioritize computations, if more than one '
'computation is registered for a file. If no label is provided, the'
'template name will be used. (Prioritization is done by '
'reading `datalad.make.priority` configuration items. If those do '
'not exist, the file `<$dataset root>.datalad/make/priority` is '
'read, if that does not exist either, a random computation is '
'chosen.)',
),
'branch': Parameter(
args=(
'-b',
Expand All @@ -117,8 +132,10 @@ class Make(ValidatedInterface):
),
action='append',
doc='An input file pattern (repeat for multiple inputs, '
'file pattern support python globbing, globbing is performed in '
'the source dataset).',
'file pattern support python globbing, globbing is performed by '
'installing all possibly matching subdatasets and performing '
'globbing in those, recursively. That means expressions like `**` '
'might pull in a huge number of datasets).',
),
'input_list': Parameter(
args=(
Expand All @@ -139,8 +156,8 @@ class Make(ValidatedInterface):
),
action='append',
doc='An output file pattern (repeat for multiple outputs)'
'file pattern support python globbing, globbing is performed in '
'the worktree).',
'file pattern support python globbing, output globbing is performed '
'in the worktree after the computation).',
),
'output_list': Parameter(
args=(
Expand All @@ -160,7 +177,7 @@ class Make(ValidatedInterface):
),
action='append',
doc='Input parameter in the form <name>=<value> (repeat for '
'multiple parameters)',
'multiple parameters).',
),
'parameter_list': Parameter(
args=(
Expand Down Expand Up @@ -196,6 +213,7 @@ def __call__(
dataset: DatasetParameter | None = None,
*,
template: str = '',
label: str = '',
prospective_execution: bool = False,
branch: str | None = None,
input: list[str] | None = None, # noqa: A002
Expand All @@ -217,7 +235,13 @@ def __call__(
# We have to get the URL first, because saving the specification to
# the dataset will change the version.
url_base, reset_commit = get_url(
ds, branch, template, parameter_dict, input_pattern, output_pattern
ds,
branch,
template,
parameter_dict,
input_pattern,
output_pattern,
label or template,
)

if not prospective_execution:
Expand Down Expand Up @@ -268,6 +292,7 @@ def get_url(
parameters: dict[str, str],
input_pattern: list[str],
output_pattern: list[str],
label: str,
) -> tuple[str, str]:
# If something goes wrong after the compute specification was saved,
# the dataset state should be reset to `branch`
Expand All @@ -280,7 +305,8 @@ def get_url(

return (
f'{url_scheme}:///'
f'?root_version={quote(dataset.repo.get_hexsha())}'
f'?label={quote(label)}'
f'&root_version={quote(dataset.repo.get_hexsha())}'
f'&specification={quote(digest)}'
), reset_branch

Expand Down
25 changes: 25 additions & 0 deletions datalad_remake/commands/tests/test_make.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from unittest.mock import MagicMock
from urllib.parse import urlparse

from datalad_next.datasets import Dataset
from datalad_next.tests import skip_if_on_windows

import datalad_remake.commands.make_cmd
from datalad_remake.commands.make_cmd import get_url
from datalad_remake.commands.tests.create_datasets import (
create_simple_computation_dataset,
)
Expand Down Expand Up @@ -49,6 +54,7 @@ def test_speculative_computation(tmp_path, datalad_cfg):
def _run_simple_computation(root_dataset: Dataset):
root_dataset.make(
template='test_method',
label='simple',
parameter=['name=Robert', 'file=a.txt'],
output=['a.txt'],
result_renderer='disabled',
Expand All @@ -57,3 +63,22 @@ def _run_simple_computation(root_dataset: Dataset):

# check that the output is correct
assert (root_dataset.pathobj / 'a.txt').read_text() == 'Hello Robert\n'


def test_label_url(monkeypatch):
root_dataset = MagicMock()
root_dataset.repo.get_hexsha = lambda: b'1234'
monkeypatch.setattr(
datalad_remake.commands.make_cmd, 'write_spec', lambda *_: '4567'
)
url, _ = get_url(
dataset=root_dataset,
branch=None,
template_name=test_method,
parameters={'name': 'Robert', 'file': 'a.txt'},
input_pattern=['a.txt'],
output_pattern=['b.txt'],
label='label1',
)
parts = urlparse(url).query.split('&')
assert 'label=label1' in parts

0 comments on commit be229c2

Please sign in to comment.