diff --git a/.gitmodules b/.gitmodules index f63c30593..cdf264f0f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,6 +13,3 @@ [submodule "sub/clinicore"] path = sub/clinicore url = https://github.com/laminlabs/clinicore -[submodule "sub/ourprojects"] - path = sub/ourprojects - url = https://github.com/laminlabs/ourprojects diff --git a/docs/api.md b/docs/api.md index a74fa16bc..a2dbf68bc 100644 --- a/docs/api.md +++ b/docs/api.md @@ -26,7 +26,6 @@ rest :hidden: bionty -ourprojects wetlab clinicore cellregistry diff --git a/docs/faq/validate-fields.ipynb b/docs/faq/validate-fields.ipynb index d87bd4ce4..47b5006b2 100644 --- a/docs/faq/validate-fields.ipynb +++ b/docs/faq/validate-fields.ipynb @@ -6,9 +6,7 @@ "source": [ "# Django field validation\n", "\n", - "[Django field validation](https://docs.djangoproject.com/en/5.1/ref/validators/) are enabled for models that inherit the `ValidateFields` class.\n", - "\n", - "For instance: [`ourprojects.Reference`](https://docs.lamin.ai/ourprojects.reference)" + "[Django field validation](https://docs.djangoproject.com/en/5.1/ref/validators/) are enabled for models that inherit the `ValidateFields` class." ] }, { @@ -17,8 +15,8 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install lamindb[ourprojects]\n", - "!lamin init --storage ./test-django-validation --modules ourprojects" + "# pip install lamindb\n", + "!lamin init --storage ./test-django-validation" ] }, { @@ -27,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "import ourprojects as pj\n", + "import lamindb as ln\n", "from lamindb.base.validation import FieldValidationError" ] }, @@ -38,7 +36,7 @@ "outputs": [], "source": [ "try:\n", - " pj.Reference(name=\"my ref\", doi=\"abc.ef\", url=\"myurl.com\")\n", + " ln.Reference(name=\"my ref\", doi=\"abc.ef\", url=\"myurl.com\")\n", "except FieldValidationError as e:\n", " print(e)" ] diff --git a/docs/ourprojects.md b/docs/ourprojects.md deleted file mode 100644 index 6e7806d8b..000000000 --- a/docs/ourprojects.md +++ /dev/null @@ -1,5 +0,0 @@ -# `ourprojects` - -```{eval-rst} -.. automodule:: ourprojects -``` diff --git a/docs/storage/prepare-transfer-local-to-cloud.ipynb b/docs/storage/prepare-transfer-local-to-cloud.ipynb index d59e1d96d..439d94aa6 100644 --- a/docs/storage/prepare-transfer-local-to-cloud.ipynb +++ b/docs/storage/prepare-transfer-local-to-cloud.ipynb @@ -34,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "ln.setup.init(storage=\"./test-transfer-to-cloud\", modules=\"bionty,wetlab,ourprojects\")\n", + "ln.setup.init(storage=\"./test-transfer-to-cloud\", modules=\"bionty,wetlab\")\n", "ln.setup.settings.auto_connect = False" ] }, diff --git a/lamindb/__init__.py b/lamindb/__init__.py index da1a63090..5798648f3 100644 --- a/lamindb/__init__.py +++ b/lamindb/__init__.py @@ -1,28 +1,37 @@ """A data framework for biology. -Core registries. +Tracking notebooks & scripts. + +.. autosummary:: + :toctree: . + + track + finish + +Registries. .. autosummary:: :toctree: . Artifact - Collection Transform + ULabel Run User Storage - ULabel Feature FeatureSet Param + Collection + Project + Reference + Person Key functionality. .. autosummary:: :toctree: . - track - finish connect Curator view @@ -44,7 +53,7 @@ """ # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc. -__version__ = "1.0a2" +__version__ = "1.0a3" from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError from lamindb_setup._check_setup import _check_instance_setup @@ -87,6 +96,9 @@ def __getattr__(name): Feature, FeatureSet, Param, + Person, + Project, + Reference, Run, Storage, Transform, diff --git a/lamindb/base/validation.py b/lamindb/base/validation.py index 0adc4cc05..d32f3e7f9 100644 --- a/lamindb/base/validation.py +++ b/lamindb/base/validation.py @@ -21,9 +21,12 @@ def validate_literal_fields(record: "Record", kwargs) -> None: Raises: ValidationError: If any field value is not in its Literal's allowed values """ + # check is based on string to avoid circular imports if record.__class__.__name__ == "Feature": # the FeatureDtype is more complicated than a simple literal # because it allows constructs like cat[ULabel] etc. + # the User model is used at startup and throws a datetime-related error otherwise + # simmilar for Storage & Source return None try: type_hints = get_type_hints(record.__class__) diff --git a/lamindb/migrations/0073_merge_ourprojects.py b/lamindb/migrations/0073_merge_ourprojects.py new file mode 100644 index 000000000..a1ebc147d --- /dev/null +++ b/lamindb/migrations/0073_merge_ourprojects.py @@ -0,0 +1,945 @@ +# Generated by Django 5.2 on 2025-01-13 05:55 + +import django.core.validators +import django.db.models.deletion +from django.db import migrations, models + +import lamindb.base.fields +import lamindb.base.ids +import lamindb.base.users +import lamindb.models + + +def migrate_data(apps, schema_editor): + """Check if source table exists and run migration if it does.""" + db = schema_editor.connection + cursor = db.cursor() + + # Check if table exists - works in both SQLite and PostgreSQL + if db.vendor == "sqlite": + cursor.execute(""" + SELECT count(*) + FROM sqlite_master + WHERE type='table' AND name='ourprojects_reference'; + """) + else: # postgresql + cursor.execute(""" + SELECT count(*) + FROM information_schema.tables + WHERE table_schema = 'public' + AND table_name = 'ourprojects_reference'; + """) + + table_exists = cursor.fetchone()[0] > 0 + if not table_exists: + return + + # Get initial counts + cursor.execute("SELECT COUNT(*) FROM ourprojects_reference") + old_count = cursor.fetchone()[0] + cursor.execute("SELECT COUNT(*) FROM lamindb_reference") + initial_target_count = cursor.fetchone()[0] + + # Begin transaction + cursor.execute("BEGIN") + try: + # Copy core tables, adding empty JSON for aux field + cursor.execute(""" + INSERT INTO lamindb_reference( + id, created_at, updated_at, created_by_id, uid, name, abbr, url, + pubmed_id, doi, preprint, public, journal, description, text, + published_at + ) + SELECT + id, created_at, updated_at, created_by_id, uid, name, abbr, url, + pubmed_id, doi, preprint, public, journal, description, text, + published_at + FROM ourprojects_reference + """) + + cursor.execute(""" + INSERT INTO lamindb_person( + id, created_at, updated_at, created_by_id, uid, name, email, + external + ) + SELECT + id, created_at, updated_at, created_by_id, uid, name, email, + external + FROM ourprojects_person + """) + + cursor.execute(""" + INSERT INTO lamindb_project( + id, created_at, updated_at, created_by_id, uid, name, abbr, + url + ) + SELECT + id, created_at, updated_at, created_by_id, uid, name, abbr, + url + FROM ourprojects_project + """) + + # Copy many-to-many relationships with aux field + cursor.execute(""" + INSERT INTO lamindb_artifactreference( + id, created_at, created_by_id, artifact_id, reference_id, + feature_id, label_ref_is_name, feature_ref_is_name + ) + SELECT + id, created_at, created_by_id, artifact_id, reference_id, + feature_id, label_ref_is_name, feature_ref_is_name + FROM ourprojects_artifactreference + """) + + cursor.execute(""" + INSERT INTO lamindb_transformreference( + id, created_at, created_by_id, transform_id, reference_id + ) + SELECT + id, created_at, created_by_id, transform_id, reference_id + FROM ourprojects_transformreference + """) + + cursor.execute(""" + INSERT INTO lamindb_collectionreference( + id, created_at, created_by_id, collection_id, reference_id + ) + SELECT + id, created_at, created_by_id, collection_id, reference_id + FROM ourprojects_collectionreference + """) + + cursor.execute(""" + INSERT INTO lamindb_artifactproject( + id, created_at, created_by_id, artifact_id, project_id, + feature_id, label_ref_is_name, feature_ref_is_name + ) + SELECT + id, created_at, created_by_id, artifact_id, project_id, + feature_id, label_ref_is_name, feature_ref_is_name + FROM ourprojects_artifactproject + """) + + cursor.execute(""" + INSERT INTO lamindb_transformproject( + id, created_at, created_by_id, transform_id, project_id + ) + SELECT + id, created_at, created_by_id, transform_id, project_id + FROM ourprojects_transformproject + """) + + cursor.execute(""" + INSERT INTO lamindb_collectionproject( + id, created_at, created_by_id, collection_id, project_id + ) + SELECT + id, created_at, created_by_id, collection_id, project_id + FROM ourprojects_collectionproject + """) + + # Verify migration + cursor.execute("SELECT COUNT(*) FROM lamindb_reference") + final_count = cursor.fetchone()[0] + expected_count = initial_target_count + old_count + + if final_count == expected_count: + # Clean up ourprojects content + cursor.execute("DELETE FROM django_migrations WHERE app = 'ourprojects'") + + # Drop tables - using standard SQL + tables = [ + "ourprojects_reference", + "ourprojects_person", + "ourprojects_project", + "ourprojects_artifactreference", + "ourprojects_transformreference", + "ourprojects_collectionreference", + "ourprojects_artifactproject", + "ourprojects_transformproject", + "ourprojects_collectionproject", + ] + + for table in tables: + if db.vendor == "sqlite": + cursor.execute(f"DROP TABLE IF EXISTS {table}") + else: # postgresql + cursor.execute(f"DROP TABLE IF EXISTS {table} CASCADE") + + cursor.execute("COMMIT") + print( + "Data migration from ourprojects to lamindb successful, you can now access ourprojects data through lamindb" + ) + else: + cursor.execute("ROLLBACK") + raise Exception("Migration failed: Record count mismatch") + + except Exception as e: + cursor.execute("ROLLBACK") + raise e + + +class Migration(migrations.Migration): + dependencies = [ + ("lamindb", "0072_remove_user__branch_code_remove_user_aux_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="ArtifactProject", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "label_ref_is_name", + lamindb.base.fields.BooleanField( + blank=True, default=None, null=True + ), + ), + ( + "feature_ref_is_name", + lamindb.base.fields.BooleanField( + blank=True, default=None, null=True + ), + ), + ( + "artifact", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="links_project", + to="lamindb.artifact", + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "feature", + lamindb.base.fields.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_artifactproject", + to="lamindb.feature", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ], + bases=(lamindb.models.LinkORM, models.Model), + ), + migrations.CreateModel( + name="ArtifactReference", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "label_ref_is_name", + lamindb.base.fields.BooleanField( + blank=True, default=None, null=True + ), + ), + ( + "feature_ref_is_name", + lamindb.base.fields.BooleanField( + blank=True, default=None, null=True + ), + ), + ( + "artifact", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="links_reference", + to="lamindb.artifact", + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "feature", + lamindb.base.fields.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_artifactreference", + to="lamindb.feature", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ], + bases=(lamindb.models.LinkORM, models.Model), + ), + migrations.CreateModel( + name="CollectionProject", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "collection", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="links_project", + to="lamindb.collection", + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ], + bases=(lamindb.models.LinkORM, models.Model), + ), + migrations.CreateModel( + name="CollectionReference", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "collection", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="links_reference", + to="lamindb.collection", + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ], + bases=(lamindb.models.LinkORM, models.Model), + ), + migrations.CreateModel( + name="Person", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ( + "updated_at", + lamindb.base.fields.DateTimeField(auto_now=True, db_index=True), + ), + ( + "_branch_code", + models.SmallIntegerField(db_default=1, db_index=True, default=1), + ), + ("aux", models.JSONField(db_default=None, default=None, null=True)), + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "uid", + lamindb.base.fields.CharField( + blank=True, + db_index=True, + default=lamindb.base.ids.base62_8, + max_length=8, + unique=True, + ), + ), + ( + "name", + lamindb.base.fields.CharField( + blank=True, db_index=True, default=None, max_length=255 + ), + ), + ( + "email", + lamindb.base.fields.EmailField( + blank=True, default=None, max_length=254, null=True + ), + ), + ( + "external", + lamindb.base.fields.BooleanField( + blank=True, db_index=True, default=True + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ( + "space", + lamindb.base.fields.ForeignKey( + blank=True, + db_default=1, + default=1, + on_delete=django.db.models.deletion.PROTECT, + to="lamindb.space", + ), + ), + ], + options={ + "abstract": False, + }, + bases=( + lamindb.models.CanCurate, + models.Model, + lamindb.models.ValidateFields, + ), + ), + migrations.CreateModel( + name="Project", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ( + "updated_at", + lamindb.base.fields.DateTimeField(auto_now=True, db_index=True), + ), + ( + "_branch_code", + models.SmallIntegerField(db_default=1, db_index=True, default=1), + ), + ("aux", models.JSONField(db_default=None, default=None, null=True)), + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "uid", + lamindb.base.fields.CharField( + blank=True, + db_index=True, + default=lamindb.base.ids.base62_12, + max_length=12, + unique=True, + ), + ), + ( + "name", + lamindb.base.fields.CharField( + blank=True, db_index=True, default=None, max_length=255 + ), + ), + ( + "abbr", + lamindb.base.fields.CharField( + blank=True, + db_index=True, + default=None, + max_length=32, + null=True, + unique=True, + ), + ), + ( + "url", + lamindb.base.fields.URLField( + blank=True, default=None, max_length=255, null=True + ), + ), + ( + "artifacts", + models.ManyToManyField( + related_name="projects", + through="lamindb.ArtifactProject", + to="lamindb.artifact", + ), + ), + ( + "collections", + models.ManyToManyField( + related_name="projects", + through="lamindb.CollectionProject", + to="lamindb.collection", + ), + ), + ( + "contributors", + models.ManyToManyField( + related_name="projects", to="lamindb.person" + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ( + "space", + lamindb.base.fields.ForeignKey( + blank=True, + db_default=1, + default=1, + on_delete=django.db.models.deletion.PROTECT, + to="lamindb.space", + ), + ), + ], + options={ + "abstract": False, + }, + bases=( + lamindb.models.CanCurate, + models.Model, + lamindb.models.ValidateFields, + ), + ), + migrations.AddField( + model_name="collectionproject", + name="project", + field=lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_collection", + to="lamindb.project", + ), + ), + migrations.AddField( + model_name="artifactproject", + name="project", + field=lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_artifact", + to="lamindb.project", + ), + ), + migrations.CreateModel( + name="Reference", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ( + "updated_at", + lamindb.base.fields.DateTimeField(auto_now=True, db_index=True), + ), + ( + "_branch_code", + models.SmallIntegerField(db_default=1, db_index=True, default=1), + ), + ("aux", models.JSONField(db_default=None, default=None, null=True)), + ("id", models.AutoField(primary_key=True, serialize=False)), + ( + "uid", + lamindb.base.fields.CharField( + blank=True, + db_index=True, + default=lamindb.base.ids.base62_12, + max_length=12, + unique=True, + ), + ), + ( + "name", + lamindb.base.fields.CharField( + blank=True, db_index=True, default=None, max_length=255 + ), + ), + ( + "abbr", + lamindb.base.fields.CharField( + blank=True, + db_index=True, + default=None, + max_length=32, + null=True, + unique=True, + ), + ), + ("url", lamindb.base.fields.URLField(blank=True, null=True)), + ( + "pubmed_id", + lamindb.base.fields.BigIntegerField( + blank=True, db_index=True, default=None, null=True + ), + ), + ( + "doi", + lamindb.base.fields.CharField( + blank=True, + db_index=True, + default=None, + max_length=255, + null=True, + validators=[ + django.core.validators.RegexValidator( + message="Must be a DOI (e.g., 10.1000/xyz123 or https://doi.org/10.1000/xyz123)", + regex="^(?:https?://(?:dx\\.)?doi\\.org/|doi:|DOI:)?10\\.\\d+/.*$", + ) + ], + ), + ), + ( + "preprint", + lamindb.base.fields.BooleanField( + blank=True, db_index=True, default=False + ), + ), + ( + "public", + lamindb.base.fields.BooleanField( + blank=True, db_index=True, default=True + ), + ), + ( + "journal", + lamindb.base.fields.TextField(blank=True, default=None, null=True), + ), + ( + "description", + lamindb.base.fields.TextField(blank=True, default=None, null=True), + ), + ( + "text", + lamindb.base.fields.TextField(blank=True, default=None, null=True), + ), + ( + "published_at", + lamindb.base.fields.DateField(blank=True, default=None, null=True), + ), + ( + "artifacts", + models.ManyToManyField( + related_name="references", + through="lamindb.ArtifactReference", + to="lamindb.artifact", + ), + ), + ( + "authors", + models.ManyToManyField( + related_name="references", to="lamindb.person" + ), + ), + ( + "collections", + models.ManyToManyField( + related_name="references", + through="lamindb.CollectionReference", + to="lamindb.collection", + ), + ), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ( + "space", + lamindb.base.fields.ForeignKey( + blank=True, + db_default=1, + default=1, + on_delete=django.db.models.deletion.PROTECT, + to="lamindb.space", + ), + ), + ], + options={ + "abstract": False, + }, + bases=( + lamindb.models.CanCurate, + models.Model, + lamindb.models.ValidateFields, + ), + ), + migrations.AddField( + model_name="project", + name="references", + field=models.ManyToManyField( + related_name="projects", to="lamindb.reference" + ), + ), + migrations.AddField( + model_name="collectionreference", + name="reference", + field=lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_collection", + to="lamindb.reference", + ), + ), + migrations.AddField( + model_name="artifactreference", + name="reference", + field=lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_artifact", + to="lamindb.reference", + ), + ), + migrations.CreateModel( + name="TransformProject", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "project", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_transform", + to="lamindb.project", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ( + "transform", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="links_project", + to="lamindb.transform", + ), + ), + ], + options={ + "unique_together": {("transform", "project")}, + }, + bases=(lamindb.models.LinkORM, models.Model), + ), + migrations.AddField( + model_name="project", + name="transforms", + field=models.ManyToManyField( + related_name="projects", + through="lamindb.TransformProject", + to="lamindb.transform", + ), + ), + migrations.CreateModel( + name="TransformReference", + fields=[ + ( + "created_at", + lamindb.base.fields.DateTimeField(auto_now_add=True, db_index=True), + ), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ( + "created_by", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.base.users.current_user_id, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.user", + ), + ), + ( + "reference", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="links_transform", + to="lamindb.reference", + ), + ), + ( + "run", + lamindb.base.fields.ForeignKey( + blank=True, + default=lamindb.models.current_run, + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="lamindb.run", + ), + ), + ( + "transform", + lamindb.base.fields.ForeignKey( + blank=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="links_reference", + to="lamindb.transform", + ), + ), + ], + options={ + "unique_together": {("transform", "reference")}, + }, + bases=(lamindb.models.LinkORM, models.Model), + ), + migrations.AddField( + model_name="reference", + name="transforms", + field=models.ManyToManyField( + related_name="references", + through="lamindb.TransformReference", + to="lamindb.transform", + ), + ), + migrations.AlterUniqueTogether( + name="collectionproject", + unique_together={("collection", "project")}, + ), + migrations.AlterUniqueTogether( + name="artifactproject", + unique_together={("artifact", "project", "feature")}, + ), + migrations.AlterUniqueTogether( + name="collectionreference", + unique_together={("collection", "reference")}, + ), + migrations.AlterUniqueTogether( + name="artifactreference", + unique_together={("artifact", "reference", "feature")}, + ), + migrations.RunPython( + migrate_data, + ), + ] diff --git a/lamindb/models.py b/lamindb/models.py index fdceccc10..31ba4529e 100644 --- a/lamindb/models.py +++ b/lamindb/models.py @@ -2,9 +2,7 @@ import sys from collections import defaultdict - -# has to be here for the type hinting to work -from datetime import datetime # noqa +from datetime import date, datetime # noqa: TC003 from itertools import chain from typing import ( TYPE_CHECKING, @@ -14,6 +12,7 @@ overload, ) +from django.core.validators import RegexValidator from django.db import IntegrityError, models from django.db.models import CASCADE, PROTECT, Field, Q from django.db.models.base import ModelBase @@ -31,11 +30,14 @@ BigIntegerField, BooleanField, CharField, + DateField, DateTimeField, + EmailField, ForeignKey, IntegerField, OneToOneField, TextField, + URLField, ) from .base.ids import base62_8, base62_12, base62_20 @@ -519,6 +521,10 @@ def query_children(self) -> QuerySet: pass +class ValidateFields: + pass + + RECORD_REGISTRY_EXAMPLE = """Example:: from lamindb import Record, fields @@ -3018,14 +3024,163 @@ def describe(self) -> None: # ------------------------------------------------------------------------------------- -# Link models +# Project management -class LinkORM: - pass +class Person(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields): + """Persons. + This registry is distinct from `User` and purely exists for project management. -class ValidateFields: + You'll soon be able to conveniently create persons from users. + + Example: + >>> person = Person( + ... name="Jane Doe", + ... email="jane.doe@example.com", + ... internal=True, + ... ).save() + """ + + class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta): + abstract = False + + id: int = models.AutoField(primary_key=True) + """Internal id, valid only in one DB instance.""" + uid: str = CharField(unique=True, max_length=8, db_index=True, default=base62_8) + """Universal id, valid across DB instances.""" + name: str = CharField(db_index=True) + """Name of the person (forename(s) lastname).""" + email: str | None = EmailField(null=True, default=None) + """Email of the person.""" + external: bool = BooleanField(default=True, db_index=True) + """Whether the person is external to the organization.""" + + +class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields): + """Projects. + + Example: + >>> project = Project( + ... name="My Project Name", + ... abbr="MPN", + ... url="https://example.com/my_project", + ... ).save() + """ + + class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta): + abstract = False + + id: int = models.AutoField(primary_key=True) + """Internal id, valid only in one DB instance.""" + uid: str = CharField(unique=True, max_length=12, db_index=True, default=base62_12) + """Universal id, valid across DB instances.""" + name: str = CharField(db_index=True) + """Title or name of the Project.""" + abbr: str | None = CharField(max_length=32, db_index=True, unique=True, null=True) + """A unique abbreviation.""" + url: str | None = URLField(max_length=255, null=True, default=None) + """A URL to view.""" + contributors: Person = models.ManyToManyField(Person, related_name="projects") + """Contributors associated with this project.""" + references: Reference = models.ManyToManyField("Reference", related_name="projects") + """References associated with this project.""" + artifacts: Artifact = models.ManyToManyField( + Artifact, through="ArtifactProject", related_name="projects" + ) + """Artifacts labeled with this Project.""" + transforms: Transform = models.ManyToManyField( + Transform, through="TransformProject", related_name="projects" + ) + """Transforms labeled with this project.""" + collections: Collection = models.ManyToManyField( + Collection, through="CollectionProject", related_name="projects" + ) + """Collections labeled with this project.""" + + +class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields): + """References such as internal studies, papers, documents, or URLs. + + Example: + >>> reference = Reference( + ... name="A Paper Title", + ... abbr="APT", + ... url="https://doi.org/10.1000/xyz123", + ... pubmed_id=12345678, + ... doi="10.1000/xyz123", + ... preprint=False, + ... journal="Nature Biotechnology", + ... description="A groundbreaking research paper.", + ... text="A really informative abstract.", + ... published_at=date(2023, 11, 21), + ... ).save() + """ + + class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta): + abstract = False + + id: int = models.AutoField(primary_key=True) + """Internal id, valid only in one DB instance.""" + uid: str = CharField(unique=True, max_length=12, db_index=True, default=base62_12) + """Universal id, valid across DB instances.""" + name: str = CharField(db_index=True) + """Title or name of the reference document.""" + abbr: str | None = CharField( + max_length=32, + db_index=True, + unique=True, + null=True, + ) + """A unique abbreviation for the reference.""" + url: str | None = URLField(null=True) + """URL linking to the reference.""" + pubmed_id: int | None = BigIntegerField(null=True, db_index=True) + """A PudMmed ID.""" + doi: str | None = CharField( + null=True, + db_index=True, + validators=[ + RegexValidator( + regex=r"^(?:https?://(?:dx\.)?doi\.org/|doi:|DOI:)?10\.\d+/.*$", + message="Must be a DOI (e.g., 10.1000/xyz123 or https://doi.org/10.1000/xyz123)", + ) + ], + ) + """Digital Object Identifier (DOI) for the reference.""" + preprint: bool = BooleanField(default=False, db_index=True) + """Whether the reference is from a preprint.""" + public: bool = BooleanField(default=True, db_index=True) + """Whether the reference is public.""" + journal: str | None = TextField(null=True) + """Name of the journal.""" + description: str | None = TextField(null=True) + """Description of the reference.""" + text: str | None = TextField(null=True) + """Abstract or full text of the reference.""" + published_at: date | None = DateField(null=True, default=None) + """Publication date.""" + authors: Person = models.ManyToManyField(Person, related_name="references") + """All people associated with this reference.""" + artifacts: Artifact = models.ManyToManyField( + Artifact, through="ArtifactReference", related_name="references" + ) + """Artifacts labeled with this reference.""" + transforms: Artifact = models.ManyToManyField( + Transform, through="TransformReference", related_name="references" + ) + """Transforms labeled with this reference.""" + collections: Artifact = models.ManyToManyField( + Collection, through="CollectionReference", related_name="references" + ) + """Collections labeled with this reference.""" + + +# ------------------------------------------------------------------------------------- +# Link models + + +class LinkORM: pass @@ -3143,6 +3298,94 @@ class Meta: unique_together = ("artifact", "paramvalue") +# ------------------------------------------------------------------------------------- +# Link models for project management + + +class ArtifactProject(BasicRecord, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_project") + project: Project = ForeignKey(Project, PROTECT, related_name="links_artifact") + feature: Feature | None = ForeignKey( + Feature, + PROTECT, + null=True, + default=None, + related_name="links_artifactproject", + ) + label_ref_is_name: bool | None = BooleanField(null=True, default=None) + feature_ref_is_name: bool | None = BooleanField(null=True, default=None) + + class Meta: + # can have the same label linked to the same artifact if the feature is different + unique_together = ("artifact", "project", "feature") + + +class TransformProject(BasicRecord, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + transform: Transform = ForeignKey(Transform, CASCADE, related_name="links_project") + project: Project = ForeignKey(Project, PROTECT, related_name="links_transform") + + class Meta: + unique_together = ("transform", "project") + + +class CollectionProject(BasicRecord, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + collection: Collection = ForeignKey( + Collection, CASCADE, related_name="links_project" + ) + project: Project = ForeignKey(Project, PROTECT, related_name="links_collection") + + class Meta: + unique_together = ("collection", "project") + + +class ArtifactReference(BasicRecord, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_reference") + reference: Reference = ForeignKey(Reference, PROTECT, related_name="links_artifact") + feature: Feature | None = ForeignKey( + Feature, + PROTECT, + null=True, + default=None, + related_name="links_artifactreference", + ) + label_ref_is_name: bool | None = BooleanField(null=True, default=None) + feature_ref_is_name: bool | None = BooleanField(null=True, default=None) + + class Meta: + # can have the same label linked to the same artifact if the feature is different + unique_together = ("artifact", "reference", "feature") + + +class TransformReference(BasicRecord, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + transform: Transform = ForeignKey( + Transform, CASCADE, related_name="links_reference" + ) + reference: Reference = ForeignKey( + Reference, PROTECT, related_name="links_transform" + ) + + class Meta: + unique_together = ("transform", "reference") + + +class CollectionReference(BasicRecord, LinkORM, TracksRun): + id: int = models.BigAutoField(primary_key=True) + collection: Collection = ForeignKey( + Collection, CASCADE, related_name="links_reference" + ) + reference: Reference = ForeignKey( + Reference, PROTECT, related_name="links_collection" + ) + + class Meta: + unique_together = ("collection", "reference") + + # class Migration(Record): # app = CharField(max_length=255) # name = CharField(max_length=255) diff --git a/noxfile.py b/noxfile.py index 8c15137fd..714cef2c9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -109,15 +109,11 @@ def install_ci(session, group): run(session, "uv pip install --system ipywidgets") elif group == "faq": extras += "bionty,jupyter" - run( - session, - "uv pip install --system --no-deps ./sub/ourprojects", - ) elif group == "storage": extras += "zarr,bionty,jupyter" run( session, - "uv pip install --system --no-deps ./sub/wetlab ./sub/ourprojects", + "uv pip install --system --no-deps ./sub/wetlab", ) run(session, "uv pip install --system vitessce") elif group == "curator": @@ -139,7 +135,7 @@ def install_ci(session, group): ) run( session, - "uv pip install --system --no-deps ./sub/wetlab ./sub/clinicore ./sub/ourprojects", + "uv pip install --system --no-deps ./sub/wetlab ./sub/clinicore", ) elif group == "cli": extras += "jupyter,bionty" @@ -152,9 +148,8 @@ def install_ci(session, group): if IS_PR or group == "docs": run( session, - "uv pip install --system --no-deps ./sub/lamindb-setup ./sub/lamin-cli ./sub/ourprojects", + "uv pip install --system --no-deps ./sub/lamindb-setup ./sub/lamin-cli", ) - run(session, "uv pip uninstall --system lnschema-core") if "bionty" in extras: run( session, @@ -241,7 +236,7 @@ def docs(session): path.rename(f"./docs/{path.name}") run( session, - "lamin init --storage ./docsbuild --modules bionty,wetlab,clinicore,ourprojects", + "lamin init --storage ./docsbuild --modules bionty,wetlab,clinicore", ) def generate_cli_docs(): diff --git a/pyproject.toml b/pyproject.toml index 747caea4e..998aab5b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ dependencies = [ "lamin_utils==0.13.10", "lamin_cli==1.0a1", "lamindb_setup[aws]==1.0a1", - "ourprojects==1.0a1", # others "pyarrow", "typing_extensions!=4.6.0", diff --git a/sub/lamindb-setup b/sub/lamindb-setup index 9899f5c0d..335d4642f 160000 --- a/sub/lamindb-setup +++ b/sub/lamindb-setup @@ -1 +1 @@ -Subproject commit 9899f5c0dc56321f918568a3e753223102619360 +Subproject commit 335d4642f7f93da15b7c22da54e3722ce9d9968f diff --git a/sub/ourprojects b/sub/ourprojects deleted file mode 160000 index 52da4a90a..000000000 --- a/sub/ourprojects +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 52da4a90af81910f24d779239bec9ef48b688869 diff --git a/tests/core/test_models.py b/tests/core/test_models.py index 4ce03b100..61153e2cf 100644 --- a/tests/core/test_models.py +++ b/tests/core/test_models.py @@ -64,6 +64,8 @@ def test_registry__repr__artifact(): .input_of_runs: Run .feature_sets: FeatureSet .collections: Collection + .projects: Project + .references: Reference Bionty fields .organisms: bionty.Organism .genes: bionty.Gene