diff --git a/aiida/storage/psql_dos/models/authinfo.py b/aiida/storage/psql_dos/models/authinfo.py index dbf9c850b8..6dae1b4916 100644 --- a/aiida/storage/psql_dos/models/authinfo.py +++ b/aiida/storage/psql_dos/models/authinfo.py @@ -19,7 +19,7 @@ class DbAuthInfo(Base): - """Database model to keep computer authentication data, per user. + """Database model to store data for :py:class:`aiida.orm.AuthInfo`, and keep computer authentication data, per user. Specifications are user-specific of how to submit jobs in the computer. The model also has an ``enabled`` logical switch that indicates whether the device is available for use or not. diff --git a/aiida/storage/psql_dos/models/comment.py b/aiida/storage/psql_dos/models/comment.py index 1b128b79b0..2147bc9d94 100644 --- a/aiida/storage/psql_dos/models/comment.py +++ b/aiida/storage/psql_dos/models/comment.py @@ -20,7 +20,10 @@ class DbComment(Base): - """Database model to store comments, relating to a node.""" + """Database model to store data for :py:class:`aiida.orm.Comment`. + + Comments can be attach to the nodes by the users. + """ __tablename__ = 'db_dbcomment' diff --git a/aiida/storage/psql_dos/models/computer.py b/aiida/storage/psql_dos/models/computer.py index 2b0549a1eb..7468c1c676 100644 --- a/aiida/storage/psql_dos/models/computer.py +++ b/aiida/storage/psql_dos/models/computer.py @@ -19,7 +19,10 @@ class DbComputer(Base): - """Database model to store computers. + """Database model to store data for :py:class:`aiida.orm.Computer`. + + Computers represent (and contain the information of) the physical hardware resources available. + Nodes can be associated with computers if they are remote codes, remote folders, or processes that had run remotely. Computers are identified within AiiDA by their ``label`` (and thus it must be unique for each one in the database), whereas the ``hostname`` is the label that identifies the computer within the network from which one can access it. diff --git a/aiida/storage/psql_dos/models/group.py b/aiida/storage/psql_dos/models/group.py index dacf12d004..b09aff3698 100644 --- a/aiida/storage/psql_dos/models/group.py +++ b/aiida/storage/psql_dos/models/group.py @@ -39,7 +39,9 @@ class DbGroupNode(Base): class DbGroup(Base): - """Database model to store groups of nodes. + """Database model to store :py:class:`aiida.orm.Group` data. + + A group may contain many different nodes, but also each node can be included in different groups. Users will typically identify and handle groups by using their ``label`` (which, unlike the ``labels`` in other models, must be unique). diff --git a/aiida/storage/psql_dos/models/log.py b/aiida/storage/psql_dos/models/log.py index 0097f5b37c..adad5f9bb4 100644 --- a/aiida/storage/psql_dos/models/log.py +++ b/aiida/storage/psql_dos/models/log.py @@ -21,7 +21,7 @@ class DbLog(Base): - """Database model to store log levels and messages relating to a process node.""" + """Database model to data for :py:class:`aiida.orm.Log`, corresponding to :py:class:`aiida.orm.ProcessNode`.""" __tablename__ = 'db_dblog' id = Column(Integer, primary_key=True) # pylint: disable=invalid-name diff --git a/aiida/storage/psql_dos/models/node.py b/aiida/storage/psql_dos/models/node.py index 1141e3629b..ffd45fc401 100644 --- a/aiida/storage/psql_dos/models/node.py +++ b/aiida/storage/psql_dos/models/node.py @@ -21,7 +21,7 @@ class DbNode(Base): - """Database model to store nodes. + """Database model to store data for :py:class:`aiida.orm.Node`. Each node can be categorized according to its ``node_type``, which indicates what kind of data or process node it is. @@ -170,7 +170,7 @@ def __str__(self): class DbLink(Base): - """Database model to store links between nodes. + """Database model to store links between :py:class:`aiida.orm.Node`. Each entry in this table contains not only the ``id`` information of the two nodes that are linked, but also some extra properties of the link themselves. diff --git a/aiida/storage/psql_dos/models/user.py b/aiida/storage/psql_dos/models/user.py index c38867c5ba..f4266806fc 100644 --- a/aiida/storage/psql_dos/models/user.py +++ b/aiida/storage/psql_dos/models/user.py @@ -18,7 +18,9 @@ class DbUser(Base): - """Database model to store users. + """Database model to store data for :py:class:`aiida.orm.User`. + + Every node that is created has a single user as its author. The user information consists of the most basic personal contact details. """ diff --git a/aiida/storage/sqlite_zip/backend.py b/aiida/storage/sqlite_zip/backend.py index 2fa5b6947c..7e05909edc 100644 --- a/aiida/storage/sqlite_zip/backend.py +++ b/aiida/storage/sqlite_zip/backend.py @@ -288,8 +288,8 @@ def get_info(self, detailed: bool = False, **kwargs) -> dict: class ZipfileBackendRepository(_RoBackendRepository): """A read-only backend for a zip file. - The zip file should contain repository files with the key format: ``/``, - i.e. files named by the sha256 hash of the file contents, inside a ```` directory. + The zip file should contain repository files with the key format: ``repo/``, + i.e. files named by the sha256 hash of the file contents, inside a ``repo`` directory. """ def __init__(self, path: str | Path): diff --git a/aiida/storage/sqlite_zip/models.py b/aiida/storage/sqlite_zip/models.py index 7e637e4bb1..7677b92917 100644 --- a/aiida/storage/sqlite_zip/models.py +++ b/aiida/storage/sqlite_zip/models.py @@ -100,6 +100,7 @@ def create_orm_cls(klass: base.Base) -> SqliteBase: klass.__name__, (SqliteBase,), { + '__doc__': klass.__doc__, '__tablename__': tbl.name, '__table__': tbl, **{col.name if col.name != 'metadata' else '_metadata': col for col in tbl.columns}, diff --git a/docs/source/conf.py b/docs/source/conf.py index beadcb8c76..52e690d318 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -260,9 +260,13 @@ def setup(app: Sphinx): 'aiida.engine.Process': 'aiida.engine.processes.process.Process', 'aiida.engine.WorkChain': 'aiida.engine.processes.workchains.workchain.WorkChain', 'aiida.engine.WorkChainSpec': 'aiida.engine.processes.workchains.workchain.WorkChainSpec', + 'aiida.orm.QueryBuilder': 'aiida.orm.querybuilder.QueryBuilder', 'aiida.orm.ArrayData': 'aiida.orm.nodes.data.array.array.ArrayData', + 'aiida.orm.AuthInfo': 'aiida.orm.authinfos.AuthInfo', 'aiida.orm.Computer': 'aiida.orm.computers.Computer', + 'aiida.orm.Comment': 'aiida.orm.comments.Comment', 'aiida.orm.Group': 'aiida.orm.groups.Group', + 'aiida.orm.Log': 'aiida.orm.logs.Log', 'aiida.orm.Node': 'aiida.orm.nodes.node.Node', 'aiida.orm.User': 'aiida.orm.users.User', 'aiida.orm.CalculationNode': 'aiida.orm.nodes.process.calculation.calculation.CalculationNode', diff --git a/docs/source/internals/database.rst b/docs/source/internals/database.rst deleted file mode 100644 index 598b0bc375..0000000000 --- a/docs/source/internals/database.rst +++ /dev/null @@ -1,138 +0,0 @@ -.. _internal_architecture:database: - -****************** -Database structure -****************** - -The database is the main tool that AiiDA uses to keep track of the provenance. -It directly stores the most critical data and contains the access information for everything that gets stored in the repository. -Its content is organized into different tables, and although the exact structure will depend on the backend used (django or sqlalchemy), most of it is the same for both possibilities. - -In the following section, we will first go through the main tables that are related to the AiiDA entities and their relationships. -These tables also have the property of being the same for both backends. -We will give a general overview and explanation of how they work, and provide a more exhaustive technical description of their internal structure. -After that, we will introduce the remaining tables that either serve a more auxiliary purpose or are backend specific. - - -The AiiDA entities and their tables -=================================== - -There are 7 entities that are stored in the database, each within its own table: - - - **db_dbnode:** the `nodes` are the most important entities of AiiDA. - The very provenance graph is made up of interconected data and process nodes. - - - **db_dbgroup:** `groups` are containers for organizing nodes. - A group may contain many different nodes, but also each node can be included in different groups. - - - **db_dbuser:** `users` represent (and contain the information of) the real life individuals working with the program. - Every node that is created has a single user as its author. - - - **db_dbcomputer:** `computers` represent (and contain the information of) the physical hardware resources available. - Nodes can be associated with computers if they are remote codes, remote folders, or processes that had run remotely. - - - **db_dbauthinfo:** `authinfos` contain the specific user configurations for accessing a given computer. - - - **db_dbcomment:** `comments` can be attach to the nodes by the users. - - - **db_dblog:** `logs` may be attached to nodes by AiiDA to provide further information of relevant events that transpired during its creation (for example, warning an errors during the execution of processes). - - -In all of the tables in the database (not just the ones mentioned above), the primary key that uniquely identifies each of their members is a positive integer number called ``id`` (sometimes also ``pk``). -However, this number is only unique within the table, and thus there can be a user with an ``id`` of 2 and a node with an ``id`` of 2 in the same database (or, more trivially, two different nodes both with an ``id`` of 2, each in a different database). - -What most of the entities also have (all the aforementioned except for users and authinfos) is a ``uuid`` value. -The ``uuid`` is meant to serve as an identifier that is unique within all tables of all AiiDA databases in the world. -This is a 32-position hexadecimal sequence that is stored as a string with some dash separated sections (for example: ``479a312d-e9b6-4bbb-93b4-f0a7174ccbf4``). - -When going over the descriptions for the entities before, you may have noticed that all of them have some kind of "interaction" or "relationship" with at least one other entity in some way. -Some of these relationships can be tracked inside of one of the related entity's tables, whilst others require the creation of a whole new table with the only purpose of keeping track of them. - - -The many-to-one relationship ----------------------------- - -You can see an example of a many-to-one relationship between users and nodes: each node will have one and only one user that has created it, while a single user may have created many nodes. -Although in that case the relationship is "mandatory", this doesn't need to be the case: for example, not all nodes will have a computer associated with them, but the ones that do will have only one and no more. - -The following entities have a many-to-one relationship: - - * Many `nodes` can be created by the same `user`. - * Many `nodes` can point to the same `computer`. - * Many `groups` can be created by the same `user`. - * Many `authinfos` can be set for the same `user`. - * Many `authinfos` can be set for the same `computer`. - * Many `comments` can be created by the same `user`. - * Many `comments` can be attached to the same `node`. - * Many `logs` can be attached to the same `node`. - -The way to keep track of these relationships is by inserting a `foreign key` column in the table of the "many" entity that points to the corresponding id value of the "one" entity they are related to. -For example, there is a ``user_id`` foreign key column in the **db_dbnode** table that stores the id of the user that created each node. - - -The many-to-many relationship ------------------------------ - -This type of relationship is a bit more difficult to track, since now both members can be related to more than one element. -Recording this in the same table as one of the entities would imply storing a list of values in a column (which is often discouraged and not well supported). -Therefore, it is more convenient to use an extra table in which each of the connections has its corresponding entry indicating which are the specific elements that are related. - -There are only two many-to-many relationships in AiiDA: - - - **Between groups and nodes:** - as specified before, many nodes can be inside the same group and a single node can belong to many different groups. - This relationship is tracked in the **db_dbgroup_dbnodes** table. - - - **Between nodes themselves (Links):** - nodes have what is known as a "self-referencing relationship", meaning that they can be connected among themselves. - Indeed, this is one of the core principles of how the provenance graph works. - This relationship is tracked in the **db_dblinks** table. - - -Table schema -============ - -The following section provides a complete schema for each of the tables of the SQLAlchemy backend. - -``*`` indicates columns with a unique constraint, ``→`` indicate foreign keys, and ``?`` indicate value types that are nullable. - -.. sqla-model:: ~aiida.storage.psql_dos.models.node.DbNode - -.. sqla-model:: ~aiida.storage.psql_dos.models.node.DbLink - -.. sqla-model:: ~aiida.storage.psql_dos.models.group.DbGroup - -.. sqla-model:: ~aiida.storage.psql_dos.models.group.DbGroupNode - -.. sqla-model:: ~aiida.storage.psql_dos.models.user.DbUser - -.. sqla-model:: ~aiida.storage.psql_dos.models.computer.DbComputer - -.. sqla-model:: ~aiida.storage.psql_dos.models.authinfo.DbAuthInfo - -.. sqla-model:: ~aiida.storage.psql_dos.models.comment.DbComment - -.. sqla-model:: ~aiida.storage.psql_dos.models.log.DbLog - -.. sqla-model:: ~aiida.storage.psql_dos.models.settings.DbSetting - - -Sequence tables ---------------- - -These are necessary to keep track of the id primary key for each main table (including the backend-specific ones). -They end in ``_id_seq`` (for example, **db_dbnode_id_seq**, **db_dbgroup_id_seq**, **db_dblink_id_seq**). - - -Backend specific tables ------------------------ - - - **auth_group** (django) - - **auth_group_permissions** (django) - - **auth_permission** (django) - - **django_content_type** (django) - - **django_migrations** (django) - - **alembic_version** (sqlalchemy) - - -.. todo:: Database migrations (#4035) diff --git a/docs/source/internals/index.rst b/docs/source/internals/index.rst index bd72fb22c9..6e9d475b6e 100644 --- a/docs/source/internals/index.rst +++ b/docs/source/internals/index.rst @@ -3,11 +3,8 @@ Internal architecture ===================== .. toctree:: - :maxdepth: 1 - database - repository - archive_format + storage/index plugin_system engine rest_api @@ -15,4 +12,3 @@ Internal architecture .. todo:: global_design - orm diff --git a/docs/source/internals/orm.rst b/docs/source/internals/orm.rst deleted file mode 100644 index 9b3079ff9a..0000000000 --- a/docs/source/internals/orm.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. todo:: - - .. _internal_architecture:orm: - - *** - ORM - *** - - .. _internal_architecture:orm:entities: - - Entities & nodes - ================ - - `#4040`_ - - .. _internal_architecture:orm:querybuilder: - - Querybuilder - ============ - - `#4041`_ - - .. _internal_architecture:orm:ontology: - - AiiDA ontology - ============== - - `#4042`_ - -.. _#4040: https://github.com/aiidateam/aiida-core/issues/4040 -.. _#4041: https://github.com/aiidateam/aiida-core/issues/4041 -.. _#4042: https://github.com/aiidateam/aiida-core/issues/4042 diff --git a/docs/source/internals/storage/architecture.rst b/docs/source/internals/storage/architecture.rst new file mode 100644 index 0000000000..74b101f43a --- /dev/null +++ b/docs/source/internals/storage/architecture.rst @@ -0,0 +1,72 @@ +.. _internal_architecture:storage:architecture: + +General architecture +==================== + +The storage of data is an important aspect of the AiiDA system. +The design for this subsystem is illustrated below. + +.. figure:: static/storage-uml.svg + :width: 80% + :align: center + + UML diagram of the storage architecture. + + Blue indicates frontend classes, red indicates backend classes, and green indicates singletons. + +Separate data is stored per ``Profile``, forming a single provenance graph. +A :py:class:`~aiida.manage.configuration.profile.Profile` instance represents a dictionary that includes the configuration details for accessing the storage for that profile, such as a database URI, etc. +Multiple ``Profile`` can be stored in a :py:class:`~aiida.manage.configuration.config.Config` instance, which is stored in the configuration file (``config.json``). + +Within a single Python process, a single :py:class:`~aiida.manage.manager.Manager` instance can be loaded, to manage access to a globally loaded ``Profile`` and its :py:class:`~aiida.orm.implementation.storage_backend.StorageBackend` instance. + +The storage API subsystem is based on an Object Relational Mapper (ORM) and is divided into two main parts: the frontend and the backend. +The frontend is responsible for the user interface, and is agnostic of any particular storage technologies, +and the backend is responsible for implementing interfaces with specific technologies (such as SQL databases). + +.. _internal_architecture:storage:architecture:frontend: + +Frontend ORM +------------ + +The frontend ORM comprises of a number of :py:class:`~aiida.orm.entities.Collection` and :py:class:`~aiida.orm.entities.Entity` subclasses, representing access to a single ORM type. + +:py:class:`~aiida.orm.User` + Represents the author of a particular entity. +:py:class:`~aiida.orm.Node` + Represents a node in a provenance graph, containing data for a particular process (:py:class:`~aiida.orm.ProcessNode`) or process input/output (:py:class:`~aiida.orm.Data`). + Nodes are connected by links, that form an acyclic graph. + Nodes also have a :py:class:`~aiida.repository.repository.Repository` instance, which is used to store binary data of the node (see also :ref:`internal-architecture:repository`). +:py:class:`~aiida.orm.Comment` + Represents a comment on a node, by a particular user. +:py:class:`~aiida.orm.Log` + Represents a log message on a :py:class:`~aiida.orm.ProcessNode`, by a particular user. +:py:class:`~aiida.orm.Group` + Represents a group of nodes. + A single node can be part of multiple groups (i.e. a one-to-many relationship). +:py:class:`~aiida.orm.Computer` + Represents a compute resource on which a process is executed. + A single computer can be attached to multiple :py:class:`~aiida.orm.ProcessNode` (i.e. a one-to-many relationship). +:py:class:`~aiida.orm.AuthInfo` + Represents a authentication information for a particular computer and user. + +The :py:class:`~aiida.orm.QueryBuilder` allows for querying of specific entities and their associated data. + +Backend Implementations +----------------------- + +Backend implementations must implement the classes outlines in :py:mod:`aiida.orm.implementation`. + +There are currently two core backend implementations: + +- ``psql_dos`` is implemented as the primary storage backend, see :ref:`internal_architecture:storage:psql_dos`. +- ``sqlite_zip`` is implemented as a storage backend for the AiiDA archive, see :ref:`internal_architecture:storage:sqlite_zip`. + +Storage maintenance and profile locking +--------------------------------------- + +The :py:meth:`~aiida.orm.implementation.storage_backend.StorageBackend.maintain` method is allows for maintenance operations on the storage (for example, to optimise memory usage), and is called by `verdi storage maintain`. + +During "full" maintenance, to guarantee the safety of its procedures, it may be necessary that the storage is not accessed by other processes. +The :py:class`~aiida.manage.profile_access.ProfileAccessManager` allows for profile access requests, and locking of profiles during such procedures. +:py:meth:`~aiida.manage.profile_access.ProfileAccessManager.request_access` is called within :py:meth:`~aiida.manage.manager.Manager.get_profile_storage`. diff --git a/docs/source/internals/storage/index.rst b/docs/source/internals/storage/index.rst new file mode 100644 index 0000000000..839e9604ab --- /dev/null +++ b/docs/source/internals/storage/index.rst @@ -0,0 +1,9 @@ +Storage +======= + +.. toctree:: + + architecture + repository + psql_dos + sqlite_zip diff --git a/docs/source/internals/storage/psql_dos.rst b/docs/source/internals/storage/psql_dos.rst new file mode 100644 index 0000000000..c880abfb22 --- /dev/null +++ b/docs/source/internals/storage/psql_dos.rst @@ -0,0 +1,108 @@ +.. _internal_architecture:storage:psql_dos: + +``psql_dos`` format +******************* + +The :py:class:`~aiida.storage.psql_dos.backend.PsqlDosBackend` is the primary format for storing provenance data. +It stores data in two places: + +1. A `PostgreSQL `_ database. +2. A disk-objectstore repository (see :ref:`internal-architecture:repository:dostore`). + +The database stores all "JSONable" entity data, organized into different tables (closely related to AiiDA ORM entities) and columns/fields. +Larger binary data (such as input/output file content), required for nodes, are stored in the disk-objectstore, and referenced by `db_dbnode.repository_metadata` as a virtual file-system. + +Interfacing with the database is achieved using the `sqlalchemy `_ ORM API. + + +The PostgreSQL database schema +============================== + +The following section provides a complete schema for the PostgreSQL database. + +Tables +------ + +In all tables, the primary key that uniquely identifies each of their members is a positive integer number in the ``id`` field. +However, this number is only unique within the table, and thus there can be a user with an ``id`` of 2 and a node with an ``id`` of 2 in the same database (or, more trivially, two different nodes both with an ``id`` of 2, each in a different database). + +Most of the entities also have a ``uuid`` value. +The ``uuid`` is meant to serve as an identifier that is unique within all tables of all AiiDA databases in the world. +This is a 32-position hexadecimal sequence that is stored as a string with some dash separated sections (for example: ``479a312d-e9b6-4bbb-93b4-f0a7174ccbf4``). + +.. note:: + + - ``*`` indicates columns with a unique constraint + - ``→`` indicate foreign keys + - ``?`` indicate value types that are nullable. + +.. sqla-model:: ~aiida.storage.psql_dos.models.user.DbUser + +.. sqla-model:: ~aiida.storage.psql_dos.models.node.DbNode + +.. sqla-model:: ~aiida.storage.psql_dos.models.node.DbLink + +.. sqla-model:: ~aiida.storage.psql_dos.models.group.DbGroup + +.. sqla-model:: ~aiida.storage.psql_dos.models.group.DbGroupNode + +.. sqla-model:: ~aiida.storage.psql_dos.models.computer.DbComputer + +.. sqla-model:: ~aiida.storage.psql_dos.models.authinfo.DbAuthInfo + +.. sqla-model:: ~aiida.storage.psql_dos.models.comment.DbComment + +.. sqla-model:: ~aiida.storage.psql_dos.models.log.DbLog + +.. sqla-model:: ~aiida.storage.psql_dos.models.settings.DbSetting + + +The many-to-one relationship +---------------------------- + +You can see an example of a many-to-one relationship between users and nodes: each node will have one and only one user that has created it, while a single user may have created many nodes. +Although in that case the relationship is "mandatory", this doesn't need to be the case: for example, not all nodes will have a computer associated with them, but the ones that do will have only one and no more. + +The following entities have a many-to-one relationship: + + * Many `nodes` can be created by the same `user`. + * Many `nodes` can point to the same `computer`. + * Many `groups` can be created by the same `user`. + * Many `authinfos` can be set for the same `user`. + * Many `authinfos` can be set for the same `computer`. + * Many `comments` can be created by the same `user`. + * Many `comments` can be attached to the same `node`. + * Many `logs` can be attached to the same `node`. + +The way to keep track of these relationships is by inserting a `foreign key` column in the table of the "many" entity that points to the corresponding id value of the "one" entity they are related to. +For example, there is a ``user_id`` foreign key column in the **db_dbnode** table that stores the id of the user that created each node. + + +The many-to-many relationship +----------------------------- + +This type of relationship is a bit more difficult to track, since now both members can be related to more than one element. +Recording this in the same table as one of the entities would imply storing a list of values in a column (which is often discouraged and not well supported). +Therefore, it is more convenient to use an extra table in which each of the connections has its corresponding entry indicating which are the specific elements that are related. + +There are only two many-to-many relationships in AiiDA: + +Between groups and nodes + as specified before, many nodes can be inside the same group and a single node can belong to many different groups. + This relationship is tracked in the **db_dbgroup_dbnodes** table. + +Between nodes themselves (Links) + nodes have what is known as a "self-referencing relationship", meaning that they can be connected among themselves. + Indeed, this is one of the core principles of how the provenance graph works. + This relationship is tracked in the **db_dblinks** table. + +Storage schema migrations +========================= + +Migrations of the storage schema, to bring it inline with updates to the ``aiida-core`` API, are implemented by :py:class:`~aiida.storage.psql_dos.migrator.PsqlDostoreMigrator` , using `alembic `_. + +Legacy schema +------------- + +The `psql_dos` storage format originates from the merging of the `django` and `sqlalchemy` backends, present in `aiida-core` version 1. +Both backends had very similar PostgreSQL database schema, and there are now two separate migration branches to merge these into a single schema. diff --git a/docs/source/internals/repository.rst b/docs/source/internals/storage/repository.rst similarity index 98% rename from docs/source/internals/repository.rst rename to docs/source/internals/storage/repository.rst index 93704a5386..214c656d36 100644 --- a/docs/source/internals/repository.rst +++ b/docs/source/internals/storage/repository.rst @@ -31,7 +31,7 @@ The frontend interface therefore needs to allow users to store and address files With that guarantee, the backend implementation is free to store the files in any way imaginable in order to meet the requirements specified above. .. _fig:internal-architecture:repository:design-node-repository: -.. figure:: include/images/repository/schematic_design_node_repo.png +.. figure:: static/repository/schematic_design_node_repo.png :align: center :width: 450px @@ -42,6 +42,8 @@ With that guarantee, the backend implementation is free to store the files in an To satisfy the requirements of the frontend interface and the actual data store at the same time, the file repository solution in AiiDA is divided into two components: a *backend* and a *frontend*. In the following, the current backend implementation, the disk object store, is described. +.. _internal-architecture:repository:dostore: + The disk object store --------------------- @@ -55,7 +57,7 @@ The *loose* directory applies one level of sharding based on the first two chara A schematic overview of the folder structure of a disk object store *container* is shown in :numref:`fig:internal-architecture:repository:design-dos`. .. _fig:internal-architecture:repository:design-dos: -.. figure:: include/images/repository/schematic_design_dos.png +.. figure:: static/repository/schematic_design_dos.png :align: center :width: 550px @@ -98,7 +100,7 @@ In a clear separation of responsibilities, the backend is solely tasked with sto For simplicity, the repository backend only deals with raw byte streams and does not maintain any sort of file hierarchy. The interface that any backend file repository should implement is defined by the :class:`~aiida.repository.backend.abstract.AbstractRepositoryBackend` abstract class. -.. literalinclude:: ../../../aiida/repository/backend/abstract.py +.. literalinclude:: ../../../../aiida/repository/backend/abstract.py :language: python :pyobject: AbstractRepositoryBackend @@ -113,7 +115,7 @@ The latter implementation simply implements the interface using a temporary scra File objects are stored in a flat manner where the filename, that functions as the unique key, is based on a randomly generated UUID, as shown in :numref:`fig:internal-architecture:repository:design-sandbox`. .. _fig:internal-architecture:repository:design-sandbox: -.. figure:: include/images/repository/schematic_design_sandbox.png +.. figure:: static/repository/schematic_design_sandbox.png :align: center :width: 550px @@ -138,7 +140,7 @@ The file repository frontend To understand how the file repository frontend integrates the ORM and the file repository backend, consider the following class diagram: .. _fig:internal-architecture:repository:class-hierarchy: -.. figure:: include/images/repository/schematic_design_class_hierarchy.png +.. figure:: static/repository/schematic_design_class_hierarchy.png :align: center :width: 550px @@ -212,7 +214,7 @@ Starting from the third level, however, the file hierarchy would once again be f A schematic overview of the resulting file hierarchy is shown in :numref:`fig:internal-architecture:repository:design-original`. .. _fig:internal-architecture:repository:design-original: -.. figure:: include/images/repository/schematic_design_original.png +.. figure:: static/repository/schematic_design_original.png :align: center :width: 550px diff --git a/docs/source/internals/archive_format.rst b/docs/source/internals/storage/sqlite_zip.rst similarity index 58% rename from docs/source/internals/archive_format.rst rename to docs/source/internals/storage/sqlite_zip.rst index c08eba34a3..39034b385a 100644 --- a/docs/source/internals/archive_format.rst +++ b/docs/source/internals/storage/sqlite_zip.rst @@ -1,9 +1,9 @@ -.. _internal_architecture:orm:archive: +.. _internal_architecture:storage:sqlite_zip: -******************** -AiiDA archive format -******************** +``sqlite_zip`` (archive) format +******************************* +The :py:class:`~aiida.storage.sqlite_zip.backend.SqliteZipBackend` is the storage format used for the AiiDA archive. An AiiDA archive is a single file format (with canonical extension ``.aiida``), for long term storage of an AiiDA provenance graph. It provides a data storage backend, integrating a database and file repository. @@ -13,27 +13,30 @@ The standard format is a ZIP archive, containing the following files: * ``db.sqlite3`` file containing the AiiDA database. * ``repo/`` directory containing the AiiDA file repository. -.. image:: include/images/archive-file-structure.* +.. figure:: static/archive-file-structure.* :width: 60% :align: center + ``sqlite_zip`` zip file format. + The central directory is written with the metadata and database records at the top of the file. Zip files are read first from the bottom, which contains the byte position of the start of the central directory, then scanning down the central directory to extract records for each file. When extracting the metadata/database only, one can simply scan for that record, then break and directly decompress the byte array for that file. In this way, we do not have to scan through all the records of the repository files +As opposed to the :ref:`internal_architecture:storage:psql_dos`, this format is "read-only", since zip files cannot be modified once created. -.. _internal_architecture:orm:archive:metadata: +.. _internal_architecture:storage:sqlite_zip:metadata: -metadata --------- +metadata schema +--------------- This file contains important information, and it is necessary for the correct interpretation of ``db.sqlite3```. This is used to avoid any incompatibilities among different versions of AiiDA. -Hre is an example ``metadata.json``: +Here is an example ``metadata.json``: -.. literalinclude :: includes/metadata.json +.. literalinclude :: static/metadata.json :language: json At the beginning of the file, we see the version of the archive file (under ``export_version``) and the version of the AiiDA code. @@ -43,13 +46,12 @@ New archive versions are introduced for several different reasons; this may gene * the database and/or archive schemes are updated or changed, * or standardized exported property values are updated in AiiDA. -.. note:: +.. important:: For archives of version 0.3 and older it is advisable that you manually try to convince yourself that the migration was completely successful. While all migrations are tested, trying to include reasonable edge-cases, the migrations involved from version 0.3 to 0.4 are intricate and the possibility of a missing edge-case test is quite real. It is worth noting that if you ever have an issue, please report it on `GitHub `_, join the `AiiDA mailing list `_, or use the `contact form `_. .. note:: - If you have migrated an archive file to the newest version, there may be an extra entry in ``metadata.json``. This simply states from which archive version the file was migrated. @@ -57,15 +59,45 @@ New archive versions are introduced for several different reasons; this may gene If you supply an old archive file that the current AiiDA code does not support, ``verdi archive import`` will automatically try to migrate the archive by calling ``verdi archive migrate``. -.. _internal_architecture:orm:archive:data-json: +.. _internal_architecture:storage:sqlite_zip:data-json: + +repository format +----------------- + +The repository is read by the :py:class:`~aiida.storage.sqlite_zip.backend.ZipfileBackendRepository`. + +The zip file should contain repository files with the key format: ``repo/``, i.e. files named by the sha256 hash of the file contents, inside a ``repo`` directory. + + +database schema +--------------- + +The database schema is intended to directly mirror that of the :ref:`internal_architecture:storage:psql_dos`. +The only differences are in the handling of certain data types by SQLite versus PostgreSQL: + +- ``UUID`` -> ``CHAR(32)`` +- ``DateTime`` -> ``TZDateTime`` +- ``JSONB`` -> ``JSON`` + +Also, `varchar_pattern_ops` indexes are not possible in SQLite. + +Tables +...... + +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbUser + +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbNode + +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbLink + +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbGroup -database --------- +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbGroupNodes -The database is in sqlite format. +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbComputer -The schema is dynamically generated from the SQLAlchemy ORM classes for the "main" database (converting `JSONB` -> `JSON`, and `UUID` -> `String`). +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbAuthInfo -.. seealso:: +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbComment - :ref:`internal_architecture:database` +.. sqla-model:: ~aiida.storage.sqlite_zip.models.DbLog diff --git a/docs/source/internals/include/images/archive-file-structure.pdf b/docs/source/internals/storage/static/archive-file-structure.pdf similarity index 100% rename from docs/source/internals/include/images/archive-file-structure.pdf rename to docs/source/internals/storage/static/archive-file-structure.pdf diff --git a/docs/source/internals/include/images/archive-file-structure.svg b/docs/source/internals/storage/static/archive-file-structure.svg similarity index 100% rename from docs/source/internals/include/images/archive-file-structure.svg rename to docs/source/internals/storage/static/archive-file-structure.svg diff --git a/docs/source/internals/includes/metadata.json b/docs/source/internals/storage/static/metadata.json similarity index 100% rename from docs/source/internals/includes/metadata.json rename to docs/source/internals/storage/static/metadata.json diff --git a/docs/source/internals/include/images/repository/schematic_design_class_hierarchy.png b/docs/source/internals/storage/static/repository/schematic_design_class_hierarchy.png similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_class_hierarchy.png rename to docs/source/internals/storage/static/repository/schematic_design_class_hierarchy.png diff --git a/docs/source/internals/include/images/repository/schematic_design_class_hierarchy.svg b/docs/source/internals/storage/static/repository/schematic_design_class_hierarchy.svg similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_class_hierarchy.svg rename to docs/source/internals/storage/static/repository/schematic_design_class_hierarchy.svg diff --git a/docs/source/internals/include/images/repository/schematic_design_dos.png b/docs/source/internals/storage/static/repository/schematic_design_dos.png similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_dos.png rename to docs/source/internals/storage/static/repository/schematic_design_dos.png diff --git a/docs/source/internals/include/images/repository/schematic_design_dos.svg b/docs/source/internals/storage/static/repository/schematic_design_dos.svg similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_dos.svg rename to docs/source/internals/storage/static/repository/schematic_design_dos.svg diff --git a/docs/source/internals/include/images/repository/schematic_design_node_repo.png b/docs/source/internals/storage/static/repository/schematic_design_node_repo.png similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_node_repo.png rename to docs/source/internals/storage/static/repository/schematic_design_node_repo.png diff --git a/docs/source/internals/include/images/repository/schematic_design_node_repo.svg b/docs/source/internals/storage/static/repository/schematic_design_node_repo.svg similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_node_repo.svg rename to docs/source/internals/storage/static/repository/schematic_design_node_repo.svg diff --git a/docs/source/internals/include/images/repository/schematic_design_original.png b/docs/source/internals/storage/static/repository/schematic_design_original.png similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_original.png rename to docs/source/internals/storage/static/repository/schematic_design_original.png diff --git a/docs/source/internals/include/images/repository/schematic_design_original.svg b/docs/source/internals/storage/static/repository/schematic_design_original.svg similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_original.svg rename to docs/source/internals/storage/static/repository/schematic_design_original.svg diff --git a/docs/source/internals/include/images/repository/schematic_design_sandbox.png b/docs/source/internals/storage/static/repository/schematic_design_sandbox.png similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_sandbox.png rename to docs/source/internals/storage/static/repository/schematic_design_sandbox.png diff --git a/docs/source/internals/include/images/repository/schematic_design_sandbox.svg b/docs/source/internals/storage/static/repository/schematic_design_sandbox.svg similarity index 100% rename from docs/source/internals/include/images/repository/schematic_design_sandbox.svg rename to docs/source/internals/storage/static/repository/schematic_design_sandbox.svg diff --git a/docs/source/internals/storage/static/storage-uml.svg b/docs/source/internals/storage/static/storage-uml.svg new file mode 100644 index 0000000000..ef3945e6a3 --- /dev/null +++ b/docs/source/internals/storage/static/storage-uml.svg @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/redirects.txt b/docs/source/redirects.txt index c52b65b898..855d62ec73 100644 --- a/docs/source/redirects.txt +++ b/docs/source/redirects.txt @@ -5,8 +5,8 @@ get_started/computers.rst howto/run_codes.rst get_started/codes.rst howto/run_codes.rst howto/plugins.rst howto/plugins_develop.rst howto/exploring.rst howto/query.rst -import_export/main.rst internals/archive_format.rst -internals/data_storage.rst internals/archive_format.rst +import_export/main.rst internals/storage/sqlite_zip.rst +internals/data_storage.rst internals/storage/sqlite_zip.rst install/quick_installation.rst intro/get_started.rst install/prerequisites.rst intro/get_started.rst install/installation.rst intro/get_started.rst