Skip to content

Commit

Permalink
test: add trusted computation test
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-monch committed Oct 26, 2024
1 parent 979c0ef commit 5b35399
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 49 deletions.
25 changes: 10 additions & 15 deletions datalad_remake/annexremotes/remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,9 @@ class RemakeRemote(SpecialRemote):
def __init__(self, annex: Master):
super().__init__(annex)
self.configs = {
'allow_untrusted_execution':
'Allow execution of untrusted code with untrusted parameters. '
'set to "true" to enable. THIS IS DANGEROUS and might lead to '
'remote code execution.',
'allow_untrusted_execution': 'Allow execution of untrusted code with untrusted parameters. '
'set to "true" to enable. THIS IS DANGEROUS and might lead to '
'remote code execution.',
}

def __del__(self):
Expand Down Expand Up @@ -92,12 +91,9 @@ def get_url_for_key(self, key: str) -> str:
self.annex.debug(f'get_url_for_key: key: {key!r}, urls: {urls!r}')
return urls[0]

def get_compute_info(self,
key: str,
*,
allow_untrusted_specs: bool = False
) -> tuple[dict[str, Any], Dataset]:

def get_compute_info(
self, key: str, *, allow_untrusted_specs: bool = False
) -> tuple[dict[str, Any], Dataset]:
def get_assigned_value(assignment: str) -> str:
return assignment.split('=', 1)[1]

Expand All @@ -122,13 +118,12 @@ def get_assigned_value(assignment: str) -> str:
def transfer_retrieve(self, key: str, file_name: str) -> None:
self.annex.debug(f'TRANSFER RETRIEVE key: {key!r}, file_name: {file_name!r}')

allow_untrusted_execution = self.annex.getconfig(
'allow_untrusted_execution'
) == 'true'
allow_untrusted_execution = (
self.annex.getconfig('allow_untrusted_execution') == 'true'
)

compute_info, dataset = self.get_compute_info(
key,
allow_untrusted_specs=allow_untrusted_execution
key, allow_untrusted_specs=allow_untrusted_execution
)
self.annex.debug(f'TRANSFER RETRIEVE compute_info: {compute_info!r}')

Expand Down
86 changes: 79 additions & 7 deletions datalad_remake/annexremotes/tests/test_remake_remote.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import re
import subprocess
from io import TextIOBase
from pathlib import Path
from queue import Queue
from typing import cast

import pytest
from annexremote import Master
from datalad_next.tests import skip_if_on_windows

from datalad_remake.commands.tests.create_datasets import create_ds_hierarchy

from ... import specification_dir
from ... import (
specification_dir,
template_dir,
)
from ...commands.make_cmd import build_json
from ..remake_remote import RemakeRemote

Expand Down Expand Up @@ -64,11 +70,24 @@ def send(self, value):


@skip_if_on_windows
def test_compute_remote_main(tmp_path, monkeypatch):
dataset = create_ds_hierarchy(tmp_path, 'ds1', 0)[0][2]
@pytest.mark.parametrize('trusted', [True, False])
def test_compute_remote_main(tmp_path, monkeypatch, trusted):
if trusted:
gpg_homedir = tmp_path / 'tmp_gpg_dir'

# make sure that the users keystore is not overwritten
monkeypatch.setenv('HOME', '/dev/null')
monkeypatch.setenv('GNUPGHOME', str(gpg_homedir))

# Generate a keypair
keyid = _create_keypair(gpg_homedir)
else:
keyid = None

dataset = create_ds_hierarchy(tmp_path, 'ds1', 0, keyid)[0][2]
monkeypatch.chdir(dataset.path)

template_path = dataset.pathobj / '.datalad' / 'make' / 'methods'
template_path = dataset.pathobj / template_dir
template_path.mkdir(parents=True)
(template_path / 'echo').write_text(template)
dataset.save()
Expand All @@ -84,10 +103,13 @@ def test_compute_remote_main(tmp_path, monkeypatch):
)
).split(b': ')[1]

(dataset.pathobj / specification_dir).mkdir(parents=True, exist_ok=True)
(dataset.pathobj / specification_dir / '000001111122222').write_text(
specification_path = dataset.pathobj / specification_dir
spec_name = '000001111122222'
specification_path.mkdir(parents=True, exist_ok=True)
(specification_path / spec_name).write_text(
build_json('echo', [], ['a.txt'], {'content': 'some_string'})
)
dataset.save()

input_ = MockedInput()

Expand All @@ -97,7 +119,7 @@ def test_compute_remote_main(tmp_path, monkeypatch):
input_.send('PREPARE\n')
input_.send(f'TRANSFER RETRIEVE {key.decode()} {tmp_path / "remade.txt"!s}\n')
# The next line is the answer to `GETCONFIG allow_untrusted_execution`
input_.send('VALUE true\n')
input_.send(f'VALUE {"false" if trusted else "true"}\n')
url = (
'datalad-make:///?'
f'root_version={dataset.repo.get_hexsha()}'
Expand All @@ -121,3 +143,53 @@ def test_compute_remote_main(tmp_path, monkeypatch):
# At this point the datalad-remake remote should have executed the
# computation and written the result.
assert (tmp_path / 'remade.txt').read_text().strip() == 'content: some_string'


def _create_keypair(gpg_dir: Path):
gpg_dir.mkdir(parents=True)
gpg_dir.chmod(0o700)
private_keys_dir = gpg_dir / 'private-keys-v1.d'
private_keys_dir.mkdir()
private_keys_dir.chmod(0o700)
script = b"""
Key-Type: RSA
Key-Length: 4096
Subkey-Type: RSA
Subkey-Length: 4096
Name-Real: Test User
Name-Email: test@example.com
Expire-Date: 0
%no-protection
#%transient-key
%commit
"""
environment = {'HOME': '/dev/null'}
result = subprocess.run(
[ # noqa: S607
'gpg',
'--batch',
'--homedir',
str(gpg_dir),
'--gen-key',
'--keyid-format',
'long',
],
input=script,
capture_output=True,
check=True,
env=environment,
)
result = subprocess.run(
[ # noqa: S607
'gpg',
'--homedir',
str(gpg_dir),
'--list-secret-keys',
'--keyid-format',
'long',
],
capture_output=True,
check=True,
env=environment,
)
return re.findall('sec.*rsa4096/([A-Z0-9]+)', result.stdout.decode())[0]
19 changes: 8 additions & 11 deletions datalad_remake/commands/make_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
call_git_oneline,
call_git_success,
)
from hypothesis.reporting import default

from datalad_remake import (
specification_dir,
Expand Down Expand Up @@ -168,19 +167,17 @@ class Make(ValidatedInterface):
'should be provided.',
),
'allow_untrusted_code': Parameter(
args=(
'--allow-untrusted-code',
),
args=('--allow-untrusted-code',),
action='store_true',
default=False,
doc='Skip commit signature verification before executing code. This '
'should only be used in a strictly controlled environment with '
'fully trusted datasets. Trusted dataset means: every commit '
'stems from a trusted entity. '
'DO NOT USE THIS OPTION, unless you are sure to understand the '
'consequences. One of which is that arbitrary parties can '
'execute arbitrary code under your account on your '
'infrastructure.',
'should only be used in a strictly controlled environment with '
'fully trusted datasets. Trusted dataset means: every commit '
'stems from a trusted entity. '
'DO NOT USE THIS OPTION, unless you are sure to understand the '
'consequences. One of which is that arbitrary parties can '
'execute arbitrary code under your account on your '
'infrastructure.',
),
}

Expand Down
25 changes: 20 additions & 5 deletions datalad_remake/commands/tests/create_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def update_config_for_remake(dataset: Dataset):
)


def add_remake_remote(dataset: Dataset):
def add_remake_remote(dataset: Dataset, signing_key: str | None = None):
aue = 'false' if signing_key else 'true'
call_git_success(
[
'-C',
Expand All @@ -30,20 +31,24 @@ def add_remake_remote(dataset: Dataset):
'type=external',
'externaltype=datalad-remake',
'encryption=none',
'allow_untrusted_execution=true',
f'allow_untrusted_execution={aue}',
],
capture_output=True,
)


def create_ds_hierarchy(
tmp_path: Path, name: str, subdataset_levels: int = 2
tmp_path: Path,
name: str,
subdataset_levels: int = 2,
signing_key: str | None = None,
) -> list[tuple[str, Path, Dataset]]:
# Create root dataset
root_dataset = Dataset(tmp_path / name)
root_dataset.create(force=True, result_renderer='disabled')
(root_dataset.pathobj / 'a.txt').write_text('a\n')
(root_dataset.pathobj / 'b.txt').write_text('b\n')
_enable_signing(root_dataset, signing_key)
root_dataset.save(result_renderer='disabled')
datasets = [(name, tmp_path / name, root_dataset)]

Expand All @@ -55,6 +60,7 @@ def create_ds_hierarchy(
(subdataset.pathobj / f'a{level}.txt').write_text(f'a{level}\n')
(subdataset.pathobj / f'b{level}.txt').write_text(f'b{level}\n')
subdataset.save(result_renderer='disabled')
_enable_signing(subdataset, signing_key)
datasets.append((f'{name}_subds{level}', subdataset_path, subdataset))

# Link the datasets
Expand All @@ -71,15 +77,24 @@ def create_ds_hierarchy(
update_config_for_remake(root_dataset)

# Add datalad-remake remotes to the root dataset and all subdatasets
add_remake_remote(root_dataset)
add_remake_remote(root_dataset, signing_key)
subdataset_path = Path()
for index in range(subdataset_levels):
subdataset_path /= f'{name}_subds{index}'
add_remake_remote(Dataset(root_dataset.pathobj / subdataset_path))
add_remake_remote(
Dataset(root_dataset.pathobj / subdataset_path),
signing_key,
)

return datasets


def _enable_signing(dataset: Dataset, key: str | None):
if key is not None:
dataset.config.set('commit.gpgsign', 'true', scope='local')
dataset.config.set('user.signingkey', key, scope='local')


def create_simple_computation_dataset(
tmp_path: Path,
dataset_name: str,
Expand Down
15 changes: 4 additions & 11 deletions datalad_remake/utils/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,12 @@

def verify_file(root_directory: Path, file: Path):
# Get the latest commit of `file`
commit = call_git_oneline([
'-C', str(root_directory),
'log', '-1', '--follow',
'--pretty=%H',
str(file)
])
commit = call_git_oneline(
['-C', str(root_directory), 'log', '-1', '--follow', '--pretty=%H', str(file)]
)

# Let git do the verification of the commit
result = call_git_success([
'-C', str(root_directory),
'verify-commit',
commit
])
result = call_git_success(['-C', str(root_directory), 'verify-commit', commit])
if not result:
msg = f'Signature validation of {file} failed'
raise ValueError(msg)

0 comments on commit 5b35399

Please sign in to comment.