diff --git a/changelog.d/pr-190.md b/changelog.d/pr-190.md new file mode 100644 index 00000000..05e291b6 --- /dev/null +++ b/changelog.d/pr-190.md @@ -0,0 +1,3 @@ +### 🚀 Enhancements and New Features + +- Add `--extra-inputs` to `containers-add`. Fixes [#189](https://github.com/datalad/datalad-container/issues/189) via [PR #190](https://github.com/datalad/datalad-container/pull/190) (by [@nobodyinperson](https://github.com/nobodyinperson)) diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index fbba420f..2c2b69b9 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -140,11 +140,29 @@ class ContainersAdd(Interface): this container, e.g. "singularity exec {img} {cmd}". Where '{img}' is a placeholder for the path to the container image and '{cmd}' is replaced with the desired command. Additional placeholders: - '{img_dspath}' is relative path to the dataset containing the image. + '{img_dspath}' is relative path to the dataset containing the image, + '{img_dirpath}' is the directory containing the '{img}'. """, metavar="FORMAT", constraints=EnsureStr() | EnsureNone(), ), + extra_input=Parameter( + args=("--extra-input",), + doc="""Additional file the container invocation depends on (e.g. + overlays used in --call-fmt). Can be specified multiple times. + Similar to --call-fmt, the placeholders {img_dspath} and + {img_dirpath} are available. Will be stored in the dataset config and + later added alongside the container image to the `extra_inputs` + field in the run-record and thus automatically be fetched when + needed. + """, + action="append", + default=[], + metavar="FILE", + # Can't use EnsureListOf(str) yet as it handles strings as iterables... + # See this PR: https://github.com/datalad/datalad/pull/7267 + # constraints=EnsureListOf(str) | EnsureNone(), + ), image=Parameter( args=("-i", "--image"), doc="""Relative path of the container image within the dataset. If not @@ -168,7 +186,7 @@ class ContainersAdd(Interface): @datasetmethod(name='containers_add') @eval_results def __call__(name, url=None, dataset=None, call_fmt=None, image=None, - update=False): + update=False, extra_input=None): if not name: raise InsufficientArgumentsError("`name` argument is required") @@ -321,6 +339,28 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, "{}.cmdexec".format(cfgbasevar), call_fmt, force=True) + # --extra-input sanity check + # TODO: might also want to do that for --call-fmt above? + extra_input_placeholders = dict(img_dirpath="", img_dspath="") + for xi in (extra_input or []): + try: + xi.format(**extra_input_placeholders) + except KeyError as exc: + yield get_status_dict( + action="containers_add", ds=ds, logger=lgr, + status="error", + message=("--extra-input %r contains unknown placeholder %s. " + "Available placeholders: %s", + repr(xi), exc, ', '.join(extra_input_placeholders))) + return + + # actually setting --extra-input config + cfgextravar = "{}.extra-input".format(cfgbasevar) + if ds.config.get(cfgextravar) is not None: + ds.config.unset(cfgextravar) + for xi in (extra_input or []): + ds.config.add(cfgextravar, xi) + # store changes to_save.append(op.join(".datalad", "config")) for r in ds.save( diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 7c45f621..50de78db 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -11,6 +11,7 @@ from datalad.distribution.dataset import datasetmethod from datalad.distribution.dataset import require_dataset from datalad.interface.base import eval_results +from datalad.utils import ensure_iter from datalad.interface.results import get_status_dict from datalad.core.local.run import ( @@ -114,6 +115,7 @@ def __call__(cmd, container_name=None, dataset=None, img=image_path, cmd=cmd, img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", ) cmd = callspec.format(**cmd_kwargs) except KeyError as exc: @@ -131,6 +133,28 @@ def __call__(cmd, container_name=None, dataset=None, # just prepend and pray cmd = container['path'] + ' ' + cmd + extra_inputs = [] + for extra_input in ensure_iter(container.get("extra-input",[]), set): + try: + xi_kwargs = dict( + img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", + ) + extra_inputs.append(extra_input.format(**xi_kwargs)) + except KeyError as exc: + yield get_status_dict( + 'run', + ds=ds, + status='error', + message=( + 'Unrecognized extra_input placeholder: %s. ' + 'See containers-add for information on known ones: %s', + exc, + ", ".join(xi_kwargs))) + return + + lgr.debug("extra_inputs = %r", extra_inputs) + with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): # fire! @@ -138,7 +162,7 @@ def __call__(cmd, container_name=None, dataset=None, cmd=cmd, dataset=dataset or (ds if ds.path == pwd else None), inputs=inputs, - extra_inputs=[image_path], + extra_inputs=[image_path] + extra_inputs, outputs=outputs, message=message, expand=expand, diff --git a/datalad_container/tests/test_containers.py b/datalad_container/tests/test_containers.py index 49ffca7c..16cb58dc 100644 --- a/datalad_container/tests/test_containers.py +++ b/datalad_container/tests/test_containers.py @@ -132,6 +132,46 @@ def test_container_files(ds_path=None, local_file=None, url=None): assert(not op.lexists(target_path)) +@with_tree(tree={ + "container.img": "container", + "overlay1.img": "overlay 1", + "overlay2.img": "overlay 2", +}) +def test_extra_inputs(ds_path=None): + container_file = 'container.img' + overlay1_file = 'overlay1.img' + overlay2_file = 'overlay2.img' + + # prepare dataset: + ds = Dataset(ds_path).create(force=True) + ds.save() + + ds.containers_add( + name="container", + image=container_file, + call_fmt="apptainer exec {img} {cmd}", + ) + ds.containers_add( + name="container-with-overlay", + image=container_file, + call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img {img} {cmd}", + extra_input=[overlay1_file] + ) + ds.containers_add( + name="container-with-two-overlays", + image=container_file, + call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img --overlay {img_dirpath}/overlay2.img:ro {img} {cmd}", + extra_input=[overlay1_file, overlay2_file] + ) + + res = ds.containers_list(**RAW_KWDS) + assert_result_count(res, 3) + + assert_equal(ds.config.get("datalad.containers.container.extra-input"), None) + assert_equal(ds.config.get("datalad.containers.container-with-overlay.extra-input",get_all=True), "overlay1.img") + assert_equal(ds.config.get("datalad.containers.container-with-two-overlays.extra-input",get_all=True), ("overlay1.img", "overlay2.img")) + + @with_tempfile @with_tree(tree={'foo.img': "foo", 'bar.img': "bar"}) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index 184c96fd..4ff3248b 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -9,6 +9,7 @@ containers_run, create, ) +from datalad.local.rerun import get_run_info from datalad.cmd import ( StdOutCapture, WitlessRunner, @@ -191,6 +192,47 @@ def test_custom_call_fmt(path=None, local_file=None): assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub', out['stdout']) +@with_tree( + tree={ + "overlay1.img": "overlay1", + "sub": { + "containers": {"container.img": "image file"}, + "overlays": {"overlay2.img": "overlay2", "overlay3.img": "overlay3"}, + }, + } +) +def test_extra_inputs(path=None): + ds = Dataset(path).create(force=True) + subds = ds.create("sub", force=True) + subds.containers_add( + "mycontainer", + image="containers/container.img", + call_fmt="echo image={img} cmd={cmd} img_dspath={img_dspath} img_dirpath={img_dirpath} > out.log", + extra_input=[ + "overlay1.img", + "{img_dirpath}/../overlays/overlay2.img", + "{img_dspath}/overlays/overlay3.img", + ], + ) + ds.save(recursive=True) # record the entire tree of files etc + ds.containers_run("XXX", container_name="sub/mycontainer") + ok_file_has_content( + os.path.join(ds.repo.path, "out.log"), + "image=sub/containers/container.img", + re_=True, + ) + commit_msg = ds.repo.call_git(["show", "--format=%B"]) + cmd, runinfo = get_run_info(ds, commit_msg) + assert set( + [ + "sub/containers/container.img", + "overlay1.img", + "sub/containers/../overlays/overlay2.img", + "sub/overlays/overlay3.img", + ] + ) == set(runinfo.get("extra_inputs", set())) + + @skip_if_no_network @with_tree(tree={"subdir": {"in": "innards"}}) def test_run_no_explicit_dataset(path=None): diff --git a/tools/ci/install-singularity.sh b/tools/ci/install-singularity.sh index f031295c..430a9ce4 100755 --- a/tools/ci/install-singularity.sh +++ b/tools/ci/install-singularity.sh @@ -4,5 +4,7 @@ release="$(curl -fsSL https://api.github.com/repos/sylabs/singularity/releases/l codename="$(lsb_release -cs)" arch="$(dpkg --print-architecture)" wget -O /tmp/singularity-ce.deb https://github.com/sylabs/singularity/releases/download/$release/singularity-ce_${release#v}-${codename}_$arch.deb +set -x +sudo apt-get install uidmap sudo dpkg -i /tmp/singularity-ce.deb sudo apt-get install -f