Skip to content

Commit

Permalink
Config: make writing to disk as atomic as possible
Browse files Browse the repository at this point in the history
The original implementation of the `Config.store` method, that writes
the configuration from memory to disk, was using `shutil.copy` which is
not an atomic operation. It is therefore possible that if the copy
operation is interrupted the configuration is not written entirely and
is left either empty or in a corrupted state.

Making this an atomic operation in 100% of the cases is not trivial and
even on the application level there are no guarantees as even on the
file system level operations that should be atomic can be interrupted
under very rare circumstances.

However, we can make a significant improvement by replacing the use of
`shutil.copy` with `os.rename`, which because it is a file move
operation should be atomic and therefore less prone to being
interrupted. The new algorithm for storing the config is as follows:

 1. Create temporary file in same directory as target file
 2. Write content of in memory config to the temporary file and flush
 3. Use `os.rename` to move the temporary file to target destination

This should prevent most common cases that could corrupt the existing
configuration file.

Finally, in `Config.from_method` now only `FileNotFoundError` is caught
when trying to read the config file from disk. Originally both `IOError`
and `OSError` were being caught, which can also be raised if an existing
file could not be read. Simply overwriting in this case with a default
config is not desirable and so in this case we simply let the exception
bubble up.
  • Loading branch information
sphuber committed Dec 2, 2020
1 parent d604916 commit 4f2ab82
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 23 deletions.
42 changes: 28 additions & 14 deletions aiida/manage/configuration/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Module that defines the configuration file of an AiiDA instance and functions to create and load it."""

import os
import shutil
import tempfile

from aiida.common import json

Expand Down Expand Up @@ -47,7 +47,7 @@ def from_file(cls, filepath):
try:
with open(filepath, 'r', encoding='utf8') as handle:
config = json.load(handle)
except (IOError, OSError):
except FileNotFoundError:
config = Config(filepath, check_and_migrate_config({}))
config.store()
else:
Expand Down Expand Up @@ -378,38 +378,52 @@ def store(self):
:return: self
"""
import tempfile
from aiida.common.files import md5_from_filelike, md5_file
from .settings import DEFAULT_CONFIG_INDENT_SIZE

# If the filepath of this configuration does not yet exist, simply write it.
if not os.path.isfile(self.filepath):
with open(self.filepath, 'wb') as handle:
self._write(handle)
self._atomic_write()
return self

# Otherwise, we write the content to a temporary file and compare its md5 checksum with the current config on
# disk. When the checksums differ, we first create a backup and only then overwrite the existing file.
with tempfile.NamedTemporaryFile() as handle:
self._write(handle)
json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE)
handle.seek(0)

if md5_from_filelike(handle) != md5_file(self.filepath):
self._backup(self.filepath)

shutil.copy(handle.name, self.filepath)
self._atomic_write()

return self

def _write(self, filelike):
"""Write the contents of `self.dictionary` to the given file handle.
def _atomic_write(self, filepath=None):
"""Write the config as it is in memory, i.e. the contents of ``self.dictionary``, to disk.
.. note:: this command will write the config from memory to a temporary file in the same directory as the
target file ``filepath``. It will then use ``os.rename`` to move the temporary file to ``filepath`` which
will be overwritten if it already exists. The ``os.rename`` is the operation that gives the best guarantee
of being atomic within the limitations of the application.
:param filelike: the filelike object to write the current configuration to
:param filepath: optional filepath to write the contents to, if not specified, the default filename is used.
"""
from .settings import DEFAULT_UMASK, DEFAULT_CONFIG_INDENT_SIZE

umask = os.umask(DEFAULT_UMASK)

try:
json.dump(self.dictionary, filelike, indent=DEFAULT_CONFIG_INDENT_SIZE)
finally:
os.umask(umask)
if filepath is None:
filepath = self.filepath

# Create a temporary file in the same directory as the target filepath, which guarantees that the temporary
# file is on the same filesystem, which is necessary to be able to use ``os.rename``. Since we are moving the
# temporary file, we should also tell the tempfile to not be automatically deleted as that will raise.
with tempfile.NamedTemporaryFile(dir=os.path.dirname(filepath), delete=False) as handle:
try:
json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE)
finally:
os.umask(umask)

handle.flush()
os.rename(handle.name, self.filepath)
28 changes: 19 additions & 9 deletions tests/manage/configuration/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Tests for the Config class."""

import os
import shutil
import tempfile
Expand Down Expand Up @@ -185,20 +184,32 @@ def compare_config_in_memory_and_on_disk(self, config, filepath):
:param filepath: absolute filepath to a configuration file
:raises AssertionError: if content of `config` is not equal to that of file on disk
"""
from io import BytesIO
from aiida.manage.configuration.settings import DEFAULT_CONFIG_INDENT_SIZE

# Write the content of the `config` in memory to a byte stream
stream = BytesIO()
config._write(stream) # pylint: disable=protected-access
in_memory = stream.getvalue()
in_memory = json.dumps(config.dictionary, indent=DEFAULT_CONFIG_INDENT_SIZE)

# Read the content stored on disk
with open(filepath, 'rb') as handle:
with open(filepath, 'r') as handle:
on_disk = handle.read()

# Compare content of in memory config and the one on disk
self.assertEqual(in_memory, on_disk)

def test_from_file_no_migrate(self):
"""Test that ``Config.from_file`` does not overwrite if the content was not migrated."""
from time import sleep

# Construct the ``Config`` instance and write it to disk
Config(self.config_filepath, self.config_dictionary).store()

timestamp = os.path.getmtime(self.config_filepath)
Config.from_file(self.config_filepath)

# Sleep a second, because for some operating systems the time resolution is of the order of a second
sleep(1)

assert os.path.getmtime(self.config_filepath) == timestamp

def test_from_file(self):
"""Test the `Config.from_file` class method.
Expand All @@ -210,8 +221,7 @@ def test_from_file(self):
config = Config.from_file(filepath_nonexisting)

# Make sure that the migrated config is written to disk, by loading it from disk and comparing to the content
# of the in memory config object. This we do by calling `Config._write` passing a simple stream to have the
# class write the file to memory instead of overwriting the file on disk.
# of the in memory config object.
self.compare_config_in_memory_and_on_disk(config, filepath_nonexisting)

# Now repeat the test for an existing file. The previous filepath now *does* exist and is migrated
Expand Down

0 comments on commit 4f2ab82

Please sign in to comment.