From 9180b4a6f6867b58ef4e64320e4cccc367c10356 Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Fri, 13 Oct 2023 23:51:54 +0200 Subject: [PATCH] `FolderData`: Expose repository API on top-level namespace In 8293e453789d0bad9cf631ecfc08542dd9ad892d, the `Node` interface was refactored to move the API of the repository to the `base.repository` namespace. The original methods would be forwarded with a deprecation message being printed. Although this made sense for most node types, in an effort to clean up the node interface which was overpopulated, for the `FolderData` the repository interface is the main interface and it doesn't make sense to force the users to go all the way down to the nested `base.repository` namespace to access it. Therefore the public API of the repository is restored on the top-level namespace of the `FolderData` class. --- aiida/orm/nodes/data/folder.py | 154 ++++++++++++++++++ aiida/orm/nodes/repository.py | 48 +++--- .../parsers/plugins/diff_tutorial/parsers.py | 4 +- tests/orm/nodes/data/test_folder.py | 39 +++++ 4 files changed, 219 insertions(+), 26 deletions(-) diff --git a/aiida/orm/nodes/data/folder.py b/aiida/orm/nodes/data/folder.py index 38c679d9ef..e9bf9171e7 100644 --- a/aiida/orm/nodes/data/folder.py +++ b/aiida/orm/nodes/data/folder.py @@ -8,11 +8,21 @@ # For further information please visit http://www.aiida.net # ########################################################################### """`Data` sub class to represent a folder on a file system.""" +from __future__ import annotations + +import contextlib +import io +import pathlib +from typing import BinaryIO, Iterable, Iterator, Optional, TextIO, Union + +from aiida.repository import File from .data import Data __all__ = ('FolderData',) +FilePath = Union[str, pathlib.PurePosixPath] + class FolderData(Data): """`Data` sub class to represent a folder on a file system.""" @@ -38,3 +48,147 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if tree: self.base.repository.put_object_from_tree(tree) + + def list_objects(self, path: str | None = None) -> list[File]: + """Return a list of the objects contained in this repository sorted by name, optionally in given sub directory. + + :param path: optional relative path inside the repository whose objects to list. + :return: a list of `File` named tuples representing the objects present in directory with the given key. + :raises TypeError: if the path is not a string and relative path. + :raises FileNotFoundError: if no object exists for the given path. + :raises NotADirectoryError: if the object at the given path is not a directory. + """ + return self.base.repository.list_objects(path) + + def list_object_names(self, path: str | None = None) -> list[str]: + """Return a sorted list of the object names contained in this repository, optionally in the given sub directory. + + :param path: optional relative path inside the repository whose objects to list. + :return: a list of `File` named tuples representing the objects present in directory with the given key. + :raises TypeError: if the path is not a string and relative path. + :raises FileNotFoundError: if no object exists for the given path. + :raises NotADirectoryError: if the object at the given path is not a directory. + """ + return self.base.repository.list_object_names(path) + + @contextlib.contextmanager + def open(self, path: str, mode='r') -> Iterator[BinaryIO | TextIO]: + """Open a file handle to an object stored under the given key. + + .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method + ``put_object_from_filelike`` instead. + + :param path: the relative path of the object within the repository. + :return: yield a byte stream object. + :raises TypeError: if the path is not a string and relative path. + :raises FileNotFoundError: if the file does not exist. + :raises IsADirectoryError: if the object is a directory and not a file. + :raises OSError: if the file could not be opened. + """ + with self.base.repository.open(path, mode) as handle: + yield handle + + def get_object(self, path: FilePath | None = None) -> File: + """Return the object at the given path. + + :param path: the relative path of the object within the repository. + :return: the `File` representing the object located at the given relative path. + :raises TypeError: if the path is not a string or ``Path``, or is an absolute path. + :raises FileNotFoundError: if no object exists for the given path. + """ + return self.base.repository.get_object(path) + + def get_object_content(self, path: str, mode='r') -> str | bytes: + """Return the content of a object identified by key. + + :param path: the relative path of the object within the repository. + :raises TypeError: if the path is not a string and relative path. + :raises FileNotFoundError: if the file does not exist. + :raises IsADirectoryError: if the object is a directory and not a file. + :raises OSError: if the file could not be opened. + """ + return self.base.repository.get_object_content(path, mode) + + def put_object_from_bytes(self, content: bytes, path: str) -> None: + """Store the given content in the repository at the given path. + + :param path: the relative path where to store the object in the repository. + :param content: the content to store. + :raises TypeError: if the path is not a string and relative path. + :raises FileExistsError: if an object already exists at the given path. + """ + return self.base.repository.put_object_from_bytes(content, path) + + def put_object_from_filelike(self, handle: io.BufferedReader, path: str) -> None: + """Store the byte contents of a file in the repository. + + :param handle: filelike object with the byte content to be stored. + :param path: the relative path where to store the object in the repository. + :raises TypeError: if the path is not a string and relative path. + :raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable. + """ + return self.base.repository.put_object_from_filelike(handle, path) + + def put_object_from_file(self, filepath: str, path: str) -> None: + """Store a new object under `path` with contents of the file located at `filepath` on the local file system. + + :param filepath: absolute path of file whose contents to copy to the repository + :param path: the relative path where to store the object in the repository. + :raises TypeError: if the path is not a string and relative path, or the handle is not a byte stream. + :raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable. + """ + return self.base.repository.put_object_from_file(filepath, path) + + def put_object_from_tree(self, filepath: str, path: Optional[str] = None) -> None: + """Store the entire contents of `filepath` on the local file system in the repository with under given `path`. + + :param filepath: absolute path of the directory whose contents to copy to the repository. + :param path: the relative path where to store the objects in the repository. + :raises TypeError: if the path is not a string and relative path. + :raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable. + """ + return self.base.repository.put_object_from_tree(filepath, path) + + def walk(self, path: Optional[FilePath] = None) -> Iterable[tuple[pathlib.PurePosixPath, list[str], list[str]]]: + """Walk over the directories and files contained within this repository. + + .. note:: the order of the dirname and filename lists that are returned is not necessarily sorted. This is in + line with the ``os.walk`` implementation where the order depends on the underlying file system used. + + :param path: the relative path of the directory within the repository whose contents to walk. + :return: tuples of root, dirnames and filenames just like ``os.walk``, with the exception that the root path is + always relative with respect to the repository root, instead of an absolute path and it is an instance of + ``pathlib.PurePosixPath`` instead of a normal string + """ + yield from self.base.repository.walk(path) + + def glob(self) -> Iterable[pathlib.PurePosixPath]: + """Yield a recursive list of all paths (files and directories).""" + yield from self.base.repository.glob() + + def copy_tree(self, target: str | pathlib.Path, path: Optional[FilePath] = None) -> None: + """Copy the contents of the entire node repository to another location on the local file system. + + :param target: absolute path of the directory where to copy the contents to. + :param path: optional relative path whose contents to copy. + """ + self.base.repository.copy_tree(target, path) + + def delete_object(self, path: str) -> None: + """Delete the object from the repository. + + :param key: fully qualified identifier for the object within the repository. + :raises TypeError: if the path is not a string and relative path. + :raises FileNotFoundError: if the file does not exist. + :raises IsADirectoryError: if the object is a directory and not a file. + :raises OSError: if the file could not be deleted. + :raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable. + """ + self.base.repository.delete_object(path) + + def erase(self) -> None: + """Delete all objects from the repository. + + :raises `~aiida.common.exceptions.ModificationNotAllowed`: when the node is stored and therefore immutable. + """ + self.base.repository.erase() diff --git a/aiida/orm/nodes/repository.py b/aiida/orm/nodes/repository.py index f64cf8395f..dc7a507a3f 100644 --- a/aiida/orm/nodes/repository.py +++ b/aiida/orm/nodes/repository.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- """Interface to the file repository of a node instance.""" +from __future__ import annotations + import contextlib import copy import io import pathlib import tempfile -from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, TextIO, Tuple, Union +from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Iterator, TextIO, Union from aiida.common import exceptions from aiida.manage import get_config_option @@ -28,25 +30,23 @@ class NodeRepository: exception if the node is stored. Otherwise the operation is just forwarded to the repository instance. The repository instance keeps an internal mapping of the file hierarchy that it maintains, starting from an empty - hierarchy if the instance was constructed normally, or from a specific hierarchy if reconstructred through the + hierarchy if the instance was constructed normally, or from a specific hierarchy if reconstructed through the ``Repository.from_serialized`` classmethod. This is only the case for stored nodes, because unstored nodes do not have any files yet when they are constructed. Once the node get's stored, the repository is asked to serialize its - metadata contents which is then stored in the ``repository_metadata`` field of the backend node. - This layer explicitly does not update the metadata of the node on a mutation action. - The reason is that for stored nodes these actions are anyway forbidden and for unstored nodes, - the final metadata will be stored in one go, once the node is stored, - so there is no need to keep updating the node metadata intermediately. - Note that this does mean that ``repository_metadata`` does not give accurate information, - as long as the node is not yet stored. + metadata contents which is then stored in the ``repository_metadata`` field of the backend node. This layer + explicitly does not update the metadata of the node on a mutation action. The reason is that for stored nodes these + actions are anyway forbidden and for unstored nodes, the final metadata will be stored in one go, once the node is + stored, so there is no need to keep updating the node metadata intermediately. Note that this does mean that + ``repository_metadata`` does not give accurate information, as long as the node is not yet stored. """ def __init__(self, node: 'Node') -> None: """Construct a new instance of the repository interface.""" self._node: 'Node' = node - self._repository_instance: Optional[Repository] = None + self._repository_instance: Repository | None = None @property - def metadata(self) -> Dict[str, Any]: + def metadata(self) -> dict[str, Any]: """Return the repository metadata, representing the virtual file hierarchy. Note, this is only accurate if the node is stored. @@ -127,7 +127,7 @@ def _clone(self, repo: 'NodeRepository') -> None: """ self._repository.clone(repo._repository) # pylint: disable=protected-access - def serialize(self) -> Dict: + def serialize(self) -> dict: """Serialize the metadata of the repository content into a JSON-serializable format. :return: dictionary with the content metadata. @@ -141,10 +141,10 @@ def hash(self) -> str: """ return self._repository.hash() - def list_objects(self, path: Optional[str] = None) -> List[File]: + def list_objects(self, path: str | None = None) -> list[File]: """Return a list of the objects contained in this repository sorted by name, optionally in given sub directory. - :param path: the relative path where to store the object in the repository. + :param path: optional relative path inside the repository whose objects to list. :return: a list of `File` named tuples representing the objects present in directory with the given key. :raises TypeError: if the path is not a string and relative path. :raises FileNotFoundError: if no object exists for the given path. @@ -152,10 +152,10 @@ def list_objects(self, path: Optional[str] = None) -> List[File]: """ return self._repository.list_objects(path) - def list_object_names(self, path: Optional[str] = None) -> List[str]: + def list_object_names(self, path: str | None = None) -> list[str]: """Return a sorted list of the object names contained in this repository, optionally in the given sub directory. - :param path: the relative path where to store the object in the repository. + :param path: optional relative path inside the repository whose objects to list. :return: a list of `File` named tuples representing the objects present in directory with the given key. :raises TypeError: if the path is not a string and relative path. :raises FileNotFoundError: if no object exists for the given path. @@ -164,7 +164,7 @@ def list_object_names(self, path: Optional[str] = None) -> List[str]: return self._repository.list_object_names(path) @contextlib.contextmanager - def open(self, path: str, mode='r') -> Iterator[Union[BinaryIO, TextIO]]: + def open(self, path: str, mode='r') -> Iterator[BinaryIO | TextIO]: """Open a file handle to an object stored under the given key. .. note:: this should only be used to open a handle to read an existing file. To write a new file use the method @@ -186,20 +186,20 @@ def open(self, path: str, mode='r') -> Iterator[Union[BinaryIO, TextIO]]: else: yield handle - def get_object(self, path: Optional[FilePath] = None) -> File: + def get_object(self, path: FilePath | None = None) -> File: """Return the object at the given path. - :param path: the relative path where to store the object in the repository. + :param path: the relative path of the object within the repository. :return: the `File` representing the object located at the given relative path. :raises TypeError: if the path is not a string or ``Path``, or is an absolute path. :raises FileNotFoundError: if no object exists for the given path. """ return self._repository.get_object(path) - def get_object_content(self, path: str, mode='r') -> Union[str, bytes]: + def get_object_content(self, path: str, mode='r') -> str | bytes: """Return the content of a object identified by key. - :param key: fully qualified identifier for the object within the repository. + :param path: the relative path of the object within the repository. :raises TypeError: if the path is not a string and relative path. :raises FileNotFoundError: if the file does not exist. :raises IsADirectoryError: if the object is a directory and not a file. @@ -259,7 +259,7 @@ def put_object_from_file(self, filepath: str, path: str): self._repository.put_object_from_file(filepath, path) self._update_repository_metadata() - def put_object_from_tree(self, filepath: str, path: Optional[str] = None): + def put_object_from_tree(self, filepath: str, path: str | None = None): """Store the entire contents of `filepath` on the local file system in the repository with under given `path`. :param filepath: absolute path of the directory whose contents to copy to the repository. @@ -271,7 +271,7 @@ def put_object_from_tree(self, filepath: str, path: Optional[str] = None): self._repository.put_object_from_tree(filepath, path) self._update_repository_metadata() - def walk(self, path: Optional[FilePath] = None) -> Iterable[Tuple[pathlib.PurePosixPath, List[str], List[str]]]: + def walk(self, path: FilePath | None = None) -> Iterable[tuple[pathlib.PurePosixPath, list[str], list[str]]]: """Walk over the directories and files contained within this repository. .. note:: the order of the dirname and filename lists that are returned is not necessarily sorted. This is in @@ -292,7 +292,7 @@ def glob(self) -> Iterable[pathlib.PurePosixPath]: for filename in filenames: yield dirpath / filename - def copy_tree(self, target: Union[str, pathlib.Path], path: Optional[FilePath] = None) -> None: + def copy_tree(self, target: str | pathlib.Path, path: FilePath | None = None) -> None: """Copy the contents of the entire node repository to another location on the local file system. :param target: absolute path of the directory where to copy the contents to. diff --git a/aiida/parsers/plugins/diff_tutorial/parsers.py b/aiida/parsers/plugins/diff_tutorial/parsers.py index d5120f6c80..cba01284d3 100644 --- a/aiida/parsers/plugins/diff_tutorial/parsers.py +++ b/aiida/parsers/plugins/diff_tutorial/parsers.py @@ -38,7 +38,7 @@ def parse(self, **kwargs): # add output file self.logger.info(f"Parsing '{output_filename}'") - with self.retrieved.open(output_filename, 'rb') as handle: + with self.retrieved.open(output_filename, 'rb') as handle: # type: ignore[arg-type] output_node = SinglefileData(file=handle) self.out('diff', output_node) @@ -59,7 +59,7 @@ def parse(self, **kwargs): # add output file self.logger.info(f"Parsing '{output_filename}'") - with self.retrieved.open(output_filename, 'rb') as handle: + with self.retrieved.open(output_filename, 'rb') as handle: # type: ignore[arg-type] output_node = SinglefileData(file=handle) self.out('diff', output_node) diff --git a/tests/orm/nodes/data/test_folder.py b/tests/orm/nodes/data/test_folder.py index fb3a531a9d..dbb8a34116 100644 --- a/tests/orm/nodes/data/test_folder.py +++ b/tests/orm/nodes/data/test_folder.py @@ -8,6 +8,8 @@ # For further information please visit http://www.aiida.net # ########################################################################### """Tests for the `FolderData` class.""" +import pytest + from aiida.orm import FolderData @@ -21,3 +23,40 @@ def test_constructor_tree(tmp_path): tmp_path.joinpath(filename).write_text(content, encoding='utf8') node = FolderData(tree=str(tmp_path)) assert sorted(node.base.repository.list_object_names()) == sorted(tree.keys()) + + +@pytest.mark.parametrize( + 'method', ( + 'list_objects', + 'list_object_names', + 'open', + 'get_object', + 'get_object_content', + 'put_object_from_bytes', + 'put_object_from_filelike', + 'put_object_from_file', + 'put_object_from_tree', + 'walk', + 'glob', + 'copy_tree', + 'delete_object', + 'erase', + ) +) +def test_api(method, recwarn): + """Test the direct interface can be called without deprecation warnings. + + During the reorganization of the ``Node`` interface, the repository methods were moved to the ``base.repository`` + namespace and deprecation warnings would be printed when the repository interface would be called directly from the + top-level namespace. The change was corrected for the ``FolderData`` since for that data type the repository API + _should_ be the direct interface, and users should not have to go down the ``.base.repository`` namespace. Here we + test that no deprecation warnings are emitted for the public API. Once the deprecation warnings are removed in + AiiDA v3.0, this test can also be removed. + """ + node = FolderData() + + try: + getattr(node, method)() + except Exception: # pylint: disable=broad-except + pass + assert len(recwarn) == 0