From 4f2ab82bb6ab8b7dec4c7ba2ac2f27da1512ac94 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Wed, 2 Dec 2020 10:35:04 +0100 Subject: [PATCH] Config: make writing to disk as atomic as possible The original implementation of the `Config.store` method, that writes the configuration from memory to disk, was using `shutil.copy` which is not an atomic operation. It is therefore possible that if the copy operation is interrupted the configuration is not written entirely and is left either empty or in a corrupted state. Making this an atomic operation in 100% of the cases is not trivial and even on the application level there are no guarantees as even on the file system level operations that should be atomic can be interrupted under very rare circumstances. However, we can make a significant improvement by replacing the use of `shutil.copy` with `os.rename`, which because it is a file move operation should be atomic and therefore less prone to being interrupted. The new algorithm for storing the config is as follows: 1. Create temporary file in same directory as target file 2. Write content of in memory config to the temporary file and flush 3. Use `os.rename` to move the temporary file to target destination This should prevent most common cases that could corrupt the existing configuration file. Finally, in `Config.from_method` now only `FileNotFoundError` is caught when trying to read the config file from disk. Originally both `IOError` and `OSError` were being caught, which can also be raised if an existing file could not be read. Simply overwriting in this case with a default config is not desirable and so in this case we simply let the exception bubble up. --- aiida/manage/configuration/config.py | 42 +++++++++++++++-------- tests/manage/configuration/test_config.py | 28 ++++++++++----- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/aiida/manage/configuration/config.py b/aiida/manage/configuration/config.py index f20ed85855..f8ac82fc1c 100644 --- a/aiida/manage/configuration/config.py +++ b/aiida/manage/configuration/config.py @@ -8,9 +8,9 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Module that defines the configuration file of an AiiDA instance and functions to create and load it.""" - import os import shutil +import tempfile from aiida.common import json @@ -47,7 +47,7 @@ def from_file(cls, filepath): try: with open(filepath, 'r', encoding='utf8') as handle: config = json.load(handle) - except (IOError, OSError): + except FileNotFoundError: config = Config(filepath, check_and_migrate_config({})) config.store() else: @@ -378,38 +378,52 @@ def store(self): :return: self """ - import tempfile from aiida.common.files import md5_from_filelike, md5_file + from .settings import DEFAULT_CONFIG_INDENT_SIZE # If the filepath of this configuration does not yet exist, simply write it. if not os.path.isfile(self.filepath): - with open(self.filepath, 'wb') as handle: - self._write(handle) + self._atomic_write() return self # Otherwise, we write the content to a temporary file and compare its md5 checksum with the current config on # disk. When the checksums differ, we first create a backup and only then overwrite the existing file. with tempfile.NamedTemporaryFile() as handle: - self._write(handle) + json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE) handle.seek(0) if md5_from_filelike(handle) != md5_file(self.filepath): self._backup(self.filepath) - shutil.copy(handle.name, self.filepath) + self._atomic_write() return self - def _write(self, filelike): - """Write the contents of `self.dictionary` to the given file handle. + def _atomic_write(self, filepath=None): + """Write the config as it is in memory, i.e. the contents of ``self.dictionary``, to disk. + + .. note:: this command will write the config from memory to a temporary file in the same directory as the + target file ``filepath``. It will then use ``os.rename`` to move the temporary file to ``filepath`` which + will be overwritten if it already exists. The ``os.rename`` is the operation that gives the best guarantee + of being atomic within the limitations of the application. - :param filelike: the filelike object to write the current configuration to + :param filepath: optional filepath to write the contents to, if not specified, the default filename is used. """ from .settings import DEFAULT_UMASK, DEFAULT_CONFIG_INDENT_SIZE umask = os.umask(DEFAULT_UMASK) - try: - json.dump(self.dictionary, filelike, indent=DEFAULT_CONFIG_INDENT_SIZE) - finally: - os.umask(umask) + if filepath is None: + filepath = self.filepath + + # Create a temporary file in the same directory as the target filepath, which guarantees that the temporary + # file is on the same filesystem, which is necessary to be able to use ``os.rename``. Since we are moving the + # temporary file, we should also tell the tempfile to not be automatically deleted as that will raise. + with tempfile.NamedTemporaryFile(dir=os.path.dirname(filepath), delete=False) as handle: + try: + json.dump(self.dictionary, handle, indent=DEFAULT_CONFIG_INDENT_SIZE) + finally: + os.umask(umask) + + handle.flush() + os.rename(handle.name, self.filepath) diff --git a/tests/manage/configuration/test_config.py b/tests/manage/configuration/test_config.py index a4e47eadb9..71c02e1528 100644 --- a/tests/manage/configuration/test_config.py +++ b/tests/manage/configuration/test_config.py @@ -8,7 +8,6 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Tests for the Config class.""" - import os import shutil import tempfile @@ -185,20 +184,32 @@ def compare_config_in_memory_and_on_disk(self, config, filepath): :param filepath: absolute filepath to a configuration file :raises AssertionError: if content of `config` is not equal to that of file on disk """ - from io import BytesIO + from aiida.manage.configuration.settings import DEFAULT_CONFIG_INDENT_SIZE - # Write the content of the `config` in memory to a byte stream - stream = BytesIO() - config._write(stream) # pylint: disable=protected-access - in_memory = stream.getvalue() + in_memory = json.dumps(config.dictionary, indent=DEFAULT_CONFIG_INDENT_SIZE) # Read the content stored on disk - with open(filepath, 'rb') as handle: + with open(filepath, 'r') as handle: on_disk = handle.read() # Compare content of in memory config and the one on disk self.assertEqual(in_memory, on_disk) + def test_from_file_no_migrate(self): + """Test that ``Config.from_file`` does not overwrite if the content was not migrated.""" + from time import sleep + + # Construct the ``Config`` instance and write it to disk + Config(self.config_filepath, self.config_dictionary).store() + + timestamp = os.path.getmtime(self.config_filepath) + Config.from_file(self.config_filepath) + + # Sleep a second, because for some operating systems the time resolution is of the order of a second + sleep(1) + + assert os.path.getmtime(self.config_filepath) == timestamp + def test_from_file(self): """Test the `Config.from_file` class method. @@ -210,8 +221,7 @@ def test_from_file(self): config = Config.from_file(filepath_nonexisting) # Make sure that the migrated config is written to disk, by loading it from disk and comparing to the content - # of the in memory config object. This we do by calling `Config._write` passing a simple stream to have the - # class write the file to memory instead of overwriting the file on disk. + # of the in memory config object. self.compare_config_in_memory_and_on_disk(config, filepath_nonexisting) # Now repeat the test for an existing file. The previous filepath now *does* exist and is migrated