Skip to content

Commit

Permalink
CalcJob: allow nested target paths for local_copy_list (#4373)
Browse files Browse the repository at this point in the history
If a `CalcJob` would specify a `local_copy_list` containing an entry
where the target remote path contains nested subdirectories, the
`upload_calculation` would except unless all subdirectories would
already exist. To solve this, one could have added a transport call that
would create the directories if the target path is nested. However, this
would risk being very inefficient if there are many local copy list
instructions with relative path, where each would incurr a command over
the transport.

Instead, we change the design and simply apply the local copy list
instructions to the sandbox folder on the local file system. This also
at the same time allows us to get rid of the inefficient workaround of
writing the file to a temporary file, because the transport interface
doesn't accept filelike objects and the file repository does not expose
filepaths on the local file system.

The only additional thing to take care of is to make sure the files from
the local copy list do not end up in the repository of the node, which
was the whole point of the `local_copy_list`'s existence in the first
place. But this is solved by simply adding each file, that is added to
the sandbox, also to the `provenance_exclude_list`.
  • Loading branch information
sphuber authored Sep 17, 2020
1 parent ff7b9e6 commit 9dfad2e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 36 deletions.
66 changes: 32 additions & 34 deletions aiida/engine/daemon/execmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
plugin-specific operations.
"""
import os
import shutil

from aiida.common import AIIDA_LOGGER, exceptions
from aiida.common.folders import SandboxFolder
Expand Down Expand Up @@ -153,17 +154,12 @@ def upload_calculation(node, transport, calc_info, folder, inputs=None, dry_run=
transport.put(handle.name, filename)
transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x

# In a dry_run, the working directory is the raw input folder, which will already contain these resources
if not dry_run:
for filename in folder.get_content_list():
logger.debug('[submission of calculation {}] copying file/folder {}...'.format(node.pk, filename))
transport.put(folder.get_abs_path(filename), filename)

# local_copy_list is a list of tuples, each with (uuid, dest_rel_path)
# NOTE: validation of these lists are done inside calculation.presubmit()
local_copy_list = calc_info.local_copy_list or []
remote_copy_list = calc_info.remote_copy_list or []
remote_symlink_list = calc_info.remote_symlink_list or []
provenance_exclude_list = calc_info.provenance_exclude_list or []

for uuid, filename, target in local_copy_list:
logger.debug('[submission of calculation {}] copying local file/folder to {}'.format(node.uuid, target))
Expand Down Expand Up @@ -196,34 +192,19 @@ def find_data_node(inputs, uuid):
if data_node is None:
logger.warning('failed to load Node<{}> specified in the `local_copy_list`'.format(uuid))
else:
# Note, once #2579 is implemented, use the `node.open` method instead of the named temporary file in
# combination with the new `Transport.put_object_from_filelike`
# Since the content of the node could potentially be binary, we read the raw bytes and pass them on
with NamedTemporaryFile(mode='wb+') as handle:
handle.write(data_node.get_object_content(filename, mode='rb'))
handle.flush()
transport.put(handle.name, target)

if dry_run:
if remote_copy_list:
with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle:
for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list:
handle.write(
'would have copied {} to {} in working directory on remote {}'.format(
remote_abs_path, dest_rel_path, computer.label
)
)

if remote_symlink_list:
with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle:
for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list:
handle.write(
'would have created symlinks from {} to {} in working directory on remote {}'.format(
remote_abs_path, dest_rel_path, computer.label
)
)
dirname = os.path.dirname(target)
if dirname:
os.makedirs(os.path.join(folder.abspath, dirname), exist_ok=True)
with folder.open(target, 'wb') as handle:
with data_node.open(filename, 'rb') as source:
shutil.copyfileobj(source, handle)
provenance_exclude_list.append(target)

else:
# In a dry_run, the working directory is the raw input folder, which will already contain these resources
if not dry_run:
for filename in folder.get_content_list():
logger.debug('[submission of calculation {}] copying file/folder {}...'.format(node.pk, filename))
transport.put(folder.get_abs_path(filename), filename)

for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list:
if remote_computer_uuid == computer.uuid:
Expand Down Expand Up @@ -266,8 +247,25 @@ def find_data_node(inputs, uuid):
'It is not possible to create a symlink between two different machines for '
'calculation {}'.format(node.pk)
)
else:

provenance_exclude_list = calc_info.provenance_exclude_list or []
if remote_copy_list:
with open(os.path.join(workdir, '_aiida_remote_copy_list.txt'), 'w') as handle:
for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_copy_list:
handle.write(
'would have copied {} to {} in working directory on remote {}'.format(
remote_abs_path, dest_rel_path, computer.label
)
)

if remote_symlink_list:
with open(os.path.join(workdir, '_aiida_remote_symlink_list.txt'), 'w') as handle:
for remote_computer_uuid, remote_abs_path, dest_rel_path in remote_symlink_list:
handle.write(
'would have created symlinks from {} to {} in working directory on remote {}'.format(
remote_abs_path, dest_rel_path, computer.label
)
)

# Loop recursively over content of the sandbox folder copying all that are not in `provenance_exclude_list`. Note
# that directories are not created explicitly. The `node.put_object_from_filelike` call will create intermediate
Expand Down
6 changes: 4 additions & 2 deletions tests/engine/test_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def define(cls, spec):
def prepare_for_submission(self, folder):
from aiida.common.datastructures import CalcInfo, CodeInfo

local_copy_list = [(self.inputs.single_file.uuid, self.inputs.single_file.filename, 'single_file')]
# Use nested path for the target filename, where the directory does not exist, to check that the engine will
# create intermediate directories as needed. Regression test for #4350
local_copy_list = [(self.inputs.single_file.uuid, self.inputs.single_file.filename, 'path/single_file')]

for name, node in self.inputs.files.items():
local_copy_list.append((node.uuid, node.filename, name))
Expand Down Expand Up @@ -286,5 +288,5 @@ def test_calcjob_dry_run_no_provenance(self):

_, node = launch.run_get_node(FileCalcJob, **inputs)
self.assertIn('folder', node.dry_run_info)
for filename in ['single_file', 'file_one', 'file_two']:
for filename in ['path', 'file_one', 'file_two']:
self.assertIn(filename, os.listdir(node.dry_run_info['folder']))

0 comments on commit 9dfad2e

Please sign in to comment.