From bd87ecfb84efd4917e408c4990cc47924bf8a5cf Mon Sep 17 00:00:00 2001 From: Sebastiaan Huber Date: Fri, 30 Apr 2021 10:14:14 +0200 Subject: [PATCH] Database schema: set `{}` as default for `DbNode.repository_metadata` Initially, when the column `repository_metadata` was added to the `DbNode` table, it was set to be nullable since a significant number of nodes will not actually have any files and therefore this value would be empty. However, to prevent clients having to deal with a null-value, the front-end ORM `Node.repository_metadata` property would return an empty dictionary in this case, such that the return type is always a dictionary. The main argument was that this would prevent unnecessary bytes from being stored in the database. However, a bug surfaced where some code expected a dictionary for the `repository_metadata` but got `None`. This particular instance was in the import code, which circumvents the ORM and went straight to the database. This is of course undesirable, but it also happens through the `QueryBuilder` that doesn't transform the returned attributes of entities through the ORM interface. Given that there are a number of layers from the ORM to the database, making sure that the typing across all layers is consistent would be tricky and prone to more bugs. The most secure solution is to simply set an empty dict as the default on the database level. The added cost to the database size should still be minimal and so is an acceptable downside to the increased stability of the code. Note that the column in the model is declared both with a server default as well as a default on the ORM level. The reason is that the server default is required for the migration. If the column were to be added without the default, existing rows would violate the non-nullable clause. For consistency, the server default is also added to the table column declaration. The ORM default is necessary to guarantee that an empty dictionary is set on a new `DbNode` instance when it is created. SqlAlchemy cannot execute the server default and so would leave it as `None`, but we require that even for unstored instances, the value defaults to an empty dictionary. --- .../db/migrations/0046_add_node_repository_metadata.py | 2 +- aiida/backends/djsite/db/models.py | 2 +- .../migrations/versions/0edcdd5a30f0_dbgroup_extras.py | 3 +++ .../versions/7536a82b2cc4_add_node_repository_metadata.py | 7 ++++++- aiida/backends/sqlalchemy/models/node.py | 2 +- aiida/orm/nodes/node.py | 3 +-- .../test_migrations_0046_add_node_repository_metadata.py | 4 ++-- .../migrations/test_migrations_0047_migrate_repository.py | 2 +- tests/backends/aiida_sqlalchemy/test_migrations.py | 4 ++-- tests/orm/implementation/test_nodes.py | 4 ++-- 10 files changed, 20 insertions(+), 13 deletions(-) diff --git a/aiida/backends/djsite/db/migrations/0046_add_node_repository_metadata.py b/aiida/backends/djsite/db/migrations/0046_add_node_repository_metadata.py index 82167f9436..110a679bde 100644 --- a/aiida/backends/djsite/db/migrations/0046_add_node_repository_metadata.py +++ b/aiida/backends/djsite/db/migrations/0046_add_node_repository_metadata.py @@ -30,7 +30,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='dbnode', name='repository_metadata', - field=django.contrib.postgres.fields.jsonb.JSONField(null=True), + field=django.contrib.postgres.fields.jsonb.JSONField(default=dict, null=True), ), upgrade_schema_version(REVISION, DOWN_REVISION), ] diff --git a/aiida/backends/djsite/db/models.py b/aiida/backends/djsite/db/models.py index 7787e72b5e..aec64055a7 100644 --- a/aiida/backends/djsite/db/models.py +++ b/aiida/backends/djsite/db/models.py @@ -127,7 +127,7 @@ class DbNode(m.Model): attributes = JSONField(default=dict, null=True) # JSON Extras extras = JSONField(default=dict, null=True) - repository_metadata = JSONField(null=True) + repository_metadata = JSONField(default=dict, null=True) objects = m.Manager() # Return aiida Node instances or their subclasses instead of DbNode instances diff --git a/aiida/backends/sqlalchemy/migrations/versions/0edcdd5a30f0_dbgroup_extras.py b/aiida/backends/sqlalchemy/migrations/versions/0edcdd5a30f0_dbgroup_extras.py index 5c22a1b234..b2968bf386 100644 --- a/aiida/backends/sqlalchemy/migrations/versions/0edcdd5a30f0_dbgroup_extras.py +++ b/aiida/backends/sqlalchemy/migrations/versions/0edcdd5a30f0_dbgroup_extras.py @@ -28,6 +28,9 @@ def upgrade(): """Upgrade: Add the extras column to the 'db_dbgroup' table""" + # We add the column with a `server_default` because otherwise the migration would fail since existing rows will not + # have a value and violate the not-nullable clause. However, the model doesn't use a server default but a default + # on the ORM level, so we remove the server default from the column directly after. op.add_column( 'db_dbgroup', sa.Column('extras', postgresql.JSONB(astext_type=sa.Text()), nullable=False, server_default='{}') ) diff --git a/aiida/backends/sqlalchemy/migrations/versions/7536a82b2cc4_add_node_repository_metadata.py b/aiida/backends/sqlalchemy/migrations/versions/7536a82b2cc4_add_node_repository_metadata.py index 8e8c6d3e94..0a2d9148ea 100644 --- a/aiida/backends/sqlalchemy/migrations/versions/7536a82b2cc4_add_node_repository_metadata.py +++ b/aiida/backends/sqlalchemy/migrations/versions/7536a82b2cc4_add_node_repository_metadata.py @@ -28,7 +28,12 @@ def upgrade(): """Migrations for the upgrade.""" - op.add_column('db_dbnode', sa.Column('repository_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + # We add the column with a `server_default` because otherwise the migration would fail since existing rows will not + # have a value and violate the not-nullable clause. + op.add_column( + 'db_dbnode', + sa.Column('repository_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=False, server_default='{}') + ) def downgrade(): diff --git a/aiida/backends/sqlalchemy/models/node.py b/aiida/backends/sqlalchemy/models/node.py index aee04e38b2..88e88219a7 100644 --- a/aiida/backends/sqlalchemy/models/node.py +++ b/aiida/backends/sqlalchemy/models/node.py @@ -41,7 +41,7 @@ class DbNode(Base): mtime = Column(DateTime(timezone=True), default=timezone.now, onupdate=timezone.now) attributes = Column(JSONB) extras = Column(JSONB) - repository_metadata = Column(JSONB) + repository_metadata = Column(JSONB, nullable=False, default=dict, server_default='{}') dbcomputer_id = Column( Integer, diff --git a/aiida/orm/nodes/node.py b/aiida/orm/nodes/node.py index de3178b41c..49a4dec0c0 100644 --- a/aiida/orm/nodes/node.py +++ b/aiida/orm/nodes/node.py @@ -37,7 +37,6 @@ from .repository import NodeRepositoryMixin if TYPE_CHECKING: - from aiida.repository import File from ..implementation import Backend from ..implementation.nodes import BackendNode @@ -286,7 +285,7 @@ def repository_metadata(self) -> typing.Dict: :return: the repository metadata """ - return self.backend_entity.repository_metadata or {} + return self.backend_entity.repository_metadata @repository_metadata.setter def repository_metadata(self, value): diff --git a/tests/backends/aiida_django/migrations/test_migrations_0046_add_node_repository_metadata.py b/tests/backends/aiida_django/migrations/test_migrations_0046_add_node_repository_metadata.py index 0798ea8637..34708a4c0f 100644 --- a/tests/backends/aiida_django/migrations/test_migrations_0046_add_node_repository_metadata.py +++ b/tests/backends/aiida_django/migrations/test_migrations_0046_add_node_repository_metadata.py @@ -25,9 +25,9 @@ def setUpBeforeMigration(self): dbnode.save() self.node_pk = dbnode.pk - def test_group_string_update(self): + def test_migration(self): """Test that the column is added and null by default.""" DbNode = self.apps.get_model('db', 'DbNode') node = DbNode.objects.get(pk=self.node_pk) assert hasattr(node, 'repository_metadata') - assert node.repository_metadata is None + assert node.repository_metadata == {} diff --git a/tests/backends/aiida_django/migrations/test_migrations_0047_migrate_repository.py b/tests/backends/aiida_django/migrations/test_migrations_0047_migrate_repository.py index f889f27f8b..b62c402fb8 100644 --- a/tests/backends/aiida_django/migrations/test_migrations_0047_migrate_repository.py +++ b/tests/backends/aiida_django/migrations/test_migrations_0047_migrate_repository.py @@ -82,7 +82,7 @@ def test_migration(self): } } } - assert node_03.repository_metadata is None + assert node_03.repository_metadata == {} for hashkey, content in ( (node_01.repository_metadata['o']['sub']['o']['path']['o']['file_b.txt']['k'], b'b'), diff --git a/tests/backends/aiida_sqlalchemy/test_migrations.py b/tests/backends/aiida_sqlalchemy/test_migrations.py index 3074bcd5aa..957230e364 100644 --- a/tests/backends/aiida_sqlalchemy/test_migrations.py +++ b/tests/backends/aiida_sqlalchemy/test_migrations.py @@ -1786,7 +1786,7 @@ def test_add_node_repository_metadata(self): try: node = session.query(DbNode).filter(DbNode.id == self.node_id).one() assert hasattr(node, 'repository_metadata') - assert node.repository_metadata is None + assert node.repository_metadata == {} finally: session.close() @@ -1884,7 +1884,7 @@ def test_migration(self): } } } - assert node_03.repository_metadata is None + assert node_03.repository_metadata == {} for hashkey, content in ( (node_01.repository_metadata['o']['sub']['o']['path']['o']['file_b.txt']['k'], b'b'), diff --git a/tests/orm/implementation/test_nodes.py b/tests/orm/implementation/test_nodes.py index af6e6dceac..217567521e 100644 --- a/tests/orm/implementation/test_nodes.py +++ b/tests/orm/implementation/test_nodes.py @@ -59,7 +59,7 @@ def test_creation(self): self.assertIsNone(node.process_type) self.assertEqual(node.attributes, dict()) self.assertEqual(node.extras, dict()) - self.assertEqual(node.repository_metadata, None) + self.assertEqual(node.repository_metadata, {}) self.assertEqual(node.node_type, self.node_type) self.assertEqual(node.label, self.node_label) self.assertEqual(node.description, self.node_description) @@ -87,7 +87,7 @@ def test_creation(self): self.assertIsNone(node.process_type) self.assertEqual(node.attributes, dict()) self.assertEqual(node.extras, dict()) - self.assertEqual(node.repository_metadata, None) + self.assertEqual(node.repository_metadata, {}) self.assertEqual(node.node_type, self.node_type) self.assertEqual(node.label, self.node_label) self.assertEqual(node.description, self.node_description)