feat: add label to compute instruction URL

datalad · Nov 18, 2024 · be229c2 · be229c2
1 parent 0e725dc
commit be229c2
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 14 deletions.
diff --git a/datalad_remake/annexremotes/remake_remote.py b/datalad_remake/annexremotes/remake_remote.py
@@ -83,7 +83,7 @@ def getcost(self) -> int:
         return 100
 
     def get_url_encoded_info(self, url: str) -> list[str]:
-        parts = urlparse(url).query.split('&', 5)
+        parts = urlparse(url).query.split('&', 3)
         self.annex.debug(f'get_url_encoded_info: url: {url!r}, parts: {parts!r}')
         return parts
 
@@ -100,7 +100,7 @@ def get_compute_info(
         def get_assigned_value(assignment: str) -> str:
             return assignment.split('=', 1)[1]
 
-        root_version, spec_name, this = (
+        label, root_version, spec_name, this = (
             unquote(get_assigned_value(expr))
             for expr in self.get_url_encoded_info(self.get_url_for_key(key))
         )

diff --git a/datalad_remake/annexremotes/tests/test_remake_remote.py b/datalad_remake/annexremotes/tests/test_remake_remote.py
@@ -126,12 +126,15 @@ def test_compute_remote_main(tmp_path, datalad_cfg, monkeypatch, trusted):
     input_.send(f'TRANSFER RETRIEVE {key.decode()} {tmp_path / "remade.txt"!s}\n')
     # The next line is the answer to `GETCONFIG allow_untrusted_execution`
     input_.send(f'VALUE {"false" if trusted else "true"}\n')
-    url = (
-        'datalad-make:///?'
-        f'root_version={dataset.repo.get_hexsha()}'
-        '&specification=000001111122222'
-        '&this=a.txt'
+    url = 'datalad-make:///?' + '&'.join(
+        [
+            'label=test1',
+            f'root_version={dataset.repo.get_hexsha()}',
+            'specification=000001111122222',
+            'this=a.txt',
+        ]
     )
+
     # The next line is the answer to
     # `GETURLS MD5E-s2--60b725f10c9c85c70d97880dfe8191b3.txt datalad-remake:`
     input_.send(f'VALUE {url}\n')

diff --git a/datalad_remake/commands/make_cmd.py b/datalad_remake/commands/make_cmd.py
@@ -68,6 +68,7 @@ class Make(ValidatedInterface):
         {
             'dataset': EnsureDataset(installed=True),
             'template': EnsureStr(min_len=1),
+            'label': EnsureStr(),
             'input': EnsureListOf(EnsureStr(min_len=1)),
             'input_list': EnsurePath(),
             'output': EnsureListOf(EnsureStr(min_len=1), min_len=1),
@@ -102,6 +103,20 @@ class Make(ValidatedInterface):
             doc='Name of the computing template (template should be present '
             'in $DATASET/.datalad/remake/methods)',
         ),
+        'label': Parameter(
+            args=(
+                '-l',
+                '--label',
+            ),
+            doc='Label of the computation. This is a user defined name that '
+            'is used to identify and prioritize computations, if more than one '
+            'computation is registered for a file. If no label is provided, the'
+            'template name will be used. (Prioritization is done by '
+            'reading `datalad.make.priority` configuration items. If those do '
+            'not exist, the file `<$dataset root>.datalad/make/priority` is '
+            'read, if that does not exist either, a random computation is '
+            'chosen.)',
+        ),
         'branch': Parameter(
             args=(
                 '-b',
@@ -117,8 +132,10 @@ class Make(ValidatedInterface):
             ),
             action='append',
             doc='An input file pattern (repeat for multiple inputs, '
-            'file pattern support python globbing, globbing is performed in '
-            'the source dataset).',
+            'file pattern support python globbing, globbing is performed by '
+            'installing all possibly matching subdatasets and performing '
+            'globbing in those, recursively. That means expressions like `**` '
+            'might pull in a huge number of datasets).',
         ),
         'input_list': Parameter(
             args=(
@@ -139,8 +156,8 @@ class Make(ValidatedInterface):
             ),
             action='append',
             doc='An output file pattern (repeat for multiple outputs)'
-            'file pattern support python globbing, globbing is performed in '
-            'the worktree).',
+            'file pattern support python globbing, output globbing is performed '
+            'in the worktree after the computation).',
         ),
         'output_list': Parameter(
             args=(
@@ -160,7 +177,7 @@ class Make(ValidatedInterface):
             ),
             action='append',
             doc='Input parameter in the form <name>=<value> (repeat for '
-            'multiple parameters)',
+            'multiple parameters).',
         ),
         'parameter_list': Parameter(
             args=(
@@ -196,6 +213,7 @@ def __call__(
         dataset: DatasetParameter | None = None,
         *,
         template: str = '',
+        label: str = '',
         prospective_execution: bool = False,
         branch: str | None = None,
         input: list[str] | None = None,  # noqa: A002
@@ -217,7 +235,13 @@ def __call__(
         # We have to get the URL first, because saving the specification to
         # the dataset will change the version.
         url_base, reset_commit = get_url(
-            ds, branch, template, parameter_dict, input_pattern, output_pattern
+            ds,
+            branch,
+            template,
+            parameter_dict,
+            input_pattern,
+            output_pattern,
+            label or template,
         )
 
         if not prospective_execution:
@@ -268,6 +292,7 @@ def get_url(
     parameters: dict[str, str],
     input_pattern: list[str],
     output_pattern: list[str],
+    label: str,
 ) -> tuple[str, str]:
     # If something goes wrong after the compute specification was saved,
     # the dataset state should be reset to `branch`
@@ -280,7 +305,8 @@ def get_url(
 
     return (
         f'{url_scheme}:///'
-        f'?root_version={quote(dataset.repo.get_hexsha())}'
+        f'?label={quote(label)}'
+        f'&root_version={quote(dataset.repo.get_hexsha())}'
         f'&specification={quote(digest)}'
     ), reset_branch
 

diff --git a/datalad_remake/commands/tests/test_make.py b/datalad_remake/commands/tests/test_make.py
@@ -1,6 +1,11 @@
+from unittest.mock import MagicMock
+from urllib.parse import urlparse
+
 from datalad_next.datasets import Dataset
 from datalad_next.tests import skip_if_on_windows
 
+import datalad_remake.commands.make_cmd
+from datalad_remake.commands.make_cmd import get_url
 from datalad_remake.commands.tests.create_datasets import (
     create_simple_computation_dataset,
 )
@@ -49,6 +54,7 @@ def test_speculative_computation(tmp_path, datalad_cfg):
 def _run_simple_computation(root_dataset: Dataset):
     root_dataset.make(
         template='test_method',
+        label='simple',
         parameter=['name=Robert', 'file=a.txt'],
         output=['a.txt'],
         result_renderer='disabled',
@@ -57,3 +63,22 @@ def _run_simple_computation(root_dataset: Dataset):
 
     # check that the output is correct
     assert (root_dataset.pathobj / 'a.txt').read_text() == 'Hello Robert\n'
+
+
+def test_label_url(monkeypatch):
+    root_dataset = MagicMock()
+    root_dataset.repo.get_hexsha = lambda: b'1234'
+    monkeypatch.setattr(
+        datalad_remake.commands.make_cmd, 'write_spec', lambda *_: '4567'
+    )
+    url, _ = get_url(
+        dataset=root_dataset,
+        branch=None,
+        template_name=test_method,
+        parameters={'name': 'Robert', 'file': 'a.txt'},
+        input_pattern=['a.txt'],
+        output_pattern=['b.txt'],
+        label='label1',
+    )
+    parts = urlparse(url).query.split('&')
+    assert 'label=label1' in parts