Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ NEW: Add Backend bulk methods #5171

Merged
merged 17 commits into from
Oct 16, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ repos:
aiida/manage/manager.py|
aiida/manage/database/delete/nodes.py|
aiida/orm/querybuilder.py|
aiida/orm/implementation/backends.py|
aiida/orm/implementation/sql/backends.py|
aiida/orm/implementation/django/backend.py|
aiida/orm/implementation/sqlalchemy/backend.py|
aiida/orm/implementation/querybuilder.py|
aiida/orm/implementation/sqlalchemy/querybuilder/.*py|
aiida/orm/nodes/data/jsonable.py|
Expand Down
28 changes: 0 additions & 28 deletions aiida/backends/djsite/utils.py

This file was deleted.

5 changes: 5 additions & 0 deletions aiida/backends/sqlalchemy/models/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
)


class DbGroupNode(Base):
"""Class to store group to nodes relation using SQLA backend."""
__table__ = table_groups_nodes


class DbGroup(Base):
"""Class to store groups using SQLA backend."""

Expand Down
5 changes: 3 additions & 2 deletions aiida/backends/sqlalchemy/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,9 @@ class DbLink(Base):
Integer, ForeignKey('db_dbnode.id', ondelete='CASCADE', deferrable=True, initially='DEFERRED'), index=True
)

input = relationship('DbNode', primaryjoin='DbLink.input_id == DbNode.id')
output = relationship('DbNode', primaryjoin='DbLink.output_id == DbNode.id')
# https://docs.sqlalchemy.org/en/14/errors.html#relationship-x-will-copy-column-q-to-column-p-which-conflicts-with-relationship-s-y
input = relationship('DbNode', primaryjoin='DbLink.input_id == DbNode.id', overlaps='inputs_q,outputs_q')
output = relationship('DbNode', primaryjoin='DbLink.output_id == DbNode.id', overlaps='inputs_q,outputs_q')

label = Column(String(255), index=True, nullable=False)
type = Column(String(255), index=True)
Expand Down
28 changes: 0 additions & 28 deletions aiida/backends/sqlalchemy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,6 @@
"""Utility functions specific to the SqlAlchemy backend."""


def delete_nodes_and_connections_sqla(pks_to_delete): # pylint: disable=invalid-name
"""
Delete all nodes corresponding to pks in the input.
:param pks_to_delete: A list, tuple or set of pks that should be deleted.
"""
# pylint: disable=no-value-for-parameter
from aiida.backends.sqlalchemy.models.group import table_groups_nodes
from aiida.backends.sqlalchemy.models.node import DbLink, DbNode
from aiida.manage.manager import get_manager

backend = get_manager().get_backend()

with backend.transaction() as session:
# I am first making a statement to delete the membership of these nodes to groups.
# Since table_groups_nodes is a sqlalchemy.schema.Table, I am using expression language to compile
# a stmt to be executed by the session. It works, but it's not nice that two different ways are used!
# Can this be changed?
stmt = table_groups_nodes.delete().where(table_groups_nodes.c.dbnode_id.in_(list(pks_to_delete)))
session.execute(stmt)
# First delete links, then the Nodes, since we are not cascading deletions.
# Here I delete the links coming out of the nodes marked for deletion.
session.query(DbLink).filter(DbLink.input_id.in_(list(pks_to_delete))).delete(synchronize_session='fetch')
# Here I delete the links pointing to the nodes marked for deletion.
session.query(DbLink).filter(DbLink.output_id.in_(list(pks_to_delete))).delete(synchronize_session='fetch')
# Now I am deleting the nodes
session.query(DbNode).filter(DbNode.id.in_(list(pks_to_delete))).delete(synchronize_session='fetch')


def flag_modified(instance, key):
"""Wrapper around `sqlalchemy.orm.attributes.flag_modified` to correctly dereference utils.ModelWrapper

Expand Down
15 changes: 0 additions & 15 deletions aiida/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Backend-agnostic utility functions"""
from aiida.backends import BACKEND_DJANGO, BACKEND_SQLA
from aiida.manage import configuration

AIIDA_ATTRIBUTE_SEP = '.'


Expand Down Expand Up @@ -47,15 +44,3 @@ def create_scoped_session_factory(engine, **kwargs):
"""Create scoped SQLAlchemy session factory"""
from sqlalchemy.orm import scoped_session, sessionmaker
return scoped_session(sessionmaker(bind=engine, future=True, **kwargs))


def delete_nodes_and_connections(pks):
"""Backend-agnostic function to delete Nodes and connections"""
if configuration.PROFILE.database_backend == BACKEND_DJANGO:
from aiida.backends.djsite.utils import delete_nodes_and_connections_django as delete_nodes_backend
elif configuration.PROFILE.database_backend == BACKEND_SQLA:
from aiida.backends.sqlalchemy.utils import delete_nodes_and_connections_sqla as delete_nodes_backend
else:
raise Exception(f'unknown backend {configuration.PROFILE.database_backend}')

delete_nodes_backend(pks)
14 changes: 14 additions & 0 deletions aiida/orm/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""Module for all common top level AiiDA entity classes and methods"""
import abc
import copy
from enum import Enum
import typing

from plumpy.base.utils import call_with_super_check, super_check
Expand All @@ -25,6 +26,19 @@
_NO_DEFAULT = tuple()


class EntityTypes(Enum):
"""Enum for referring to ORM entities in a backend-agnostic manner."""
AUTHINFO = 'authinfo'
COMMENT = 'comment'
COMPUTER = 'computer'
GROUP = 'group'
LOG = 'log'
NODE = 'node'
USER = 'user'
LINK = 'link'
GROUP_NODE = 'group_node'


class Collection(typing.Generic[EntityType]):
"""Container class that represents the collection of objects of a particular type."""

Expand Down
71 changes: 61 additions & 10 deletions aiida/orm/implementation/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
###########################################################################
"""Generic backend related objects"""
import abc
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, ContextManager, Generic, List, Sequence, TypeVar

if TYPE_CHECKING:
from sqlalchemy.orm.session import Session

from aiida.orm.entities import EntityTypes
from aiida.orm.implementation import (
BackendAuthInfoCollection,
BackendCommentCollection,
Expand All @@ -27,12 +28,14 @@

__all__ = ('Backend',)

TransactionType = TypeVar('TransactionType')

class Backend(abc.ABC):

class Backend(abc.ABC, Generic[TransactionType]):
"""The public interface that defines a backend factory that creates backend specific concrete objects."""

@abc.abstractmethod
def migrate(self):
def migrate(self) -> None:
"""Migrate the database to the latest schema generation or version."""

@property
Expand Down Expand Up @@ -65,17 +68,24 @@ def logs(self) -> 'BackendLogCollection':
def nodes(self) -> 'BackendNodeCollection':
"""Return the collection of nodes"""

@property
@abc.abstractmethod
def users(self) -> 'BackendUserCollection':
"""Return the collection of users"""

@abc.abstractmethod
def query(self) -> 'BackendQueryBuilder':
"""Return an instance of a query builder implementation for this backend"""

@property
@abc.abstractmethod
def users(self) -> 'BackendUserCollection':
"""Return the collection of users"""
def get_session(self) -> 'Session':
"""Return a database session that can be used by the `QueryBuilder` to perform its query.

:return: an instance of :class:`sqlalchemy.orm.session.Session`
"""

@abc.abstractmethod
def transaction(self):
def transaction(self) -> ContextManager[TransactionType]:
"""
Get a context manager that can be used as a transaction context for a series of backend operations.
If there is an exception within the context then the changes will be rolled back and the state will
Expand All @@ -85,8 +95,49 @@ def transaction(self):
"""

@abc.abstractmethod
def get_session(self) -> 'Session':
"""Return a database session that can be used by the `QueryBuilder` to perform its query.
def delete_nodes_and_connections(self, pks_to_delete: Sequence[int], transaction: TransactionType):
"""Delete all nodes corresponding to pks in the input.

:return: an instance of :class:`sqlalchemy.orm.session.Session`
This method is intended to be used within a transaction context.

:param pks_to_delete: a sequence of node pks to delete
:param transact: the returned instance from entering transaction context
"""

@abc.abstractmethod
def bulk_insert(
self,
entity_type: 'EntityTypes',
rows: List[dict],
transaction: TransactionType,
allow_defaults: bool = False
) -> List[int]:
"""Insert a list of entities into the database, directly into a backend transaction.

This method is intended to be used within a transaction context.

:param entity_type: The type of the entity
:param data: A list of dictionaries, containing all fields of the backend model,
except the `id` field (a.k.a primary key), which will be generated dynamically
:param transaction: the returned object of the ``self.transaction`` context
:param allow_defaults: If ``False``, assert that each row contains all fields (except primary key(s)),
otherwise, allow default values for missing fields.

:raises: ``IntegrityError`` if the keys in a row are not a subset of the columns in the table

:returns: The list of generated primary keys for the entities
"""

@abc.abstractmethod
def bulk_update(self, entity_type: 'EntityTypes', rows: List[dict], transaction: TransactionType) -> None:
"""Update a list of entities in the database, directly with a backend transaction.

This method is intended to be used within a transaction context.

:param entity_type: The type of the entity
:param data: A list of dictionaries, containing fields of the backend model to update,
and the `id` field (a.k.a primary key)
:param transaction: the returned object of the ``self.transaction`` context

:raises: ``IntegrityError`` if the keys in a row are not a subset of the columns in the table
"""
Loading