diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml new file mode 100644 index 0000000..69cbfad --- /dev/null +++ b/.github/workflows/publish_pypi.yml @@ -0,0 +1,51 @@ +name: Publish Python distribution package to PyPI + +on: + release: + types: [published] + +jobs: + build: + name: Build distribution package + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python distribution package to PyPI + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/mite_schema + permissions: + id-token: write + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c7a6e2..eb63e95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1] 05-08-2024 + +### Changed + +- schema/enzyme: 'databaseIds' is now an object, with allowed database cross-links as keys (before: an array of database-crosslinks) +- schema/reactions: 'databaseIds' is now an object, with allowed database cross-links as keys (before: an array of database-crosslinks) + ## [1.0] 05-08-2024 ### Added -- Implemented MITE Schmea and SchemaManager() +- Implemented MITE Schema and SchemaManager() ### Changed diff --git a/README.md b/README.md index c79c89b..dd4cfa1 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,12 @@ More information about the schema can be found in [the MITE publication](https:/ ## Quick Start +To validate your MITE-formatted .json-file, run: + - `hatch run mite_schema -i ` ### For developers -- Install `python3` - Install `hatch` using one of the methods described [here](https://hatch.pypa.io/1.12/install/) - Download or clone this repository - Run `hatch -v env create dev`. This will download and install the appropriate Python version and any required packages diff --git a/mite_schema/schema/definitions/enzyme.json b/mite_schema/schema/definitions/enzyme.json index 1c95662..773d7df 100644 --- a/mite_schema/schema/definitions/enzyme.json +++ b/mite_schema/schema/definitions/enzyme.json @@ -17,11 +17,19 @@ "type": "string" }, "databaseIds": { - "type": "array", - "items": { - "$ref": "#/$defs/databaseId" - }, - "uniqueItems": true + "type": "object", + "additionalProperties": false, + "properties": { + "uniprot": { + "$ref": "#/$defs/uniprot" + }, + "genpept": { + "$ref": "#/$defs/genpept" + }, + "mibig": { + "$ref": "#/$defs/mibig" + } + } }, "auxiliaryEnzymes": { "title": "Specify any required auxiliary enzymes that are co-forming the maturation machinery.", @@ -39,11 +47,16 @@ "type": "string" }, "databaseIds": { - "type": "array", - "items": { - "$ref": "#/$defs/databaseId" - }, - "uniqueItems": true + "type": "object", + "additionalProperties": false, + "properties": { + "uniprot": { + "$ref": "#/$defs/uniprot" + }, + "genpept": { + "$ref": "#/$defs/genpept" + } + } } } } @@ -58,26 +71,20 @@ } }, "$defs": { - "databaseId": { + "uniprot": { + "title": "Uniprot ID reference.", "type": "string", - "oneOf": [ - { - "title": "Uniprot ID reference.", - "pattern": "^uniprot:[A-Z0-9]+$" - }, - { - "title": "GenBank GenPept ID (= protein ID, GenBank gene products)", - "pattern": "^genpept:[A-Z]{3}[0-9]{5,7}\\.[0-9]+$" - }, - { - "title": "MIBiG ID reference", - "pattern": "^mibig:BGC\\d{7}$" - }, - { - "title": "Temporary MIBiG ID reference", - "pattern": "^mibig:new\\d+$" - } - ] + "pattern": "^[A-Z0-9]+$" + }, + "genpept": { + "title": "GenBank GenPept ID (= protein ID, GenBank gene products)", + "type": "string", + "pattern": "^[A-Z]{3}[0-9]{5,7}\\.[0-9]+$" + }, + "mibig": { + "title": "MIBiG ID reference", + "type": "string", + "pattern": "^BGC\\d{7}$|^new\\d+$" } } } \ No newline at end of file diff --git a/mite_schema/schema/definitions/reactions.json b/mite_schema/schema/definitions/reactions.json index 61ca514..9c918e1 100644 --- a/mite_schema/schema/definitions/reactions.json +++ b/mite_schema/schema/definitions/reactions.json @@ -30,11 +30,19 @@ "uniqueItems": true }, "databaseIds": { - "type": "array", - "items": { - "$ref": "#/$defs/databaseId" - }, - "uniqueItems": true + "type": "object", + "additionalProperties": false, + "properties": { + "rhea": { + "$ref": "#/$defs/rhea" + }, + "mite": { + "$ref": "#/$defs/mite" + }, + "ec": { + "$ref": "#/$defs/ec" + } + } }, "reactionSMARTS": { "title": "A reaction SMARTS pattern describing the (sub)structure to match and the reaction to perform.", @@ -139,20 +147,20 @@ "Other" ] }, - "databaseId": { - "title": "Cross-reference to databases.", + "rhea": { + "title": "RHEA cross-reference", "type": "string", - "oneOf": [ - { - "pattern": "^rhea:(\\d+)$" - }, - { - "pattern": "^MITE(\\d{7,7})$" - }, - { - "pattern": "^EC [0-9]+(\\.[0-9]+){0,3}$" - } - ] + "pattern": "^(\\d+)$" + }, + "mite": { + "title": "MITE cross-reference", + "type": "string", + "pattern": "^MITE(\\d{7})$" + }, + "ec": { + "title": "EC number (2-4 identifiers)", + "type": "string", + "pattern": "^EC [0-9]+(\\.[0-9]+){0,3}$" }, "evidence": { "type": "object", diff --git a/mite_schema/schema_manager/__init__.py b/mite_schema/schema_manager/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mite_schema/schema_manager/schema_manager.py b/mite_schema/schema_manager/schema_manager.py index 996f4c2..a8fe94d 100644 --- a/mite_schema/schema_manager/schema_manager.py +++ b/mite_schema/schema_manager/schema_manager.py @@ -148,5 +148,6 @@ def validate_mite(self: Self, instance: dict): jsonschema.validate(instance=instance, schema=entry, registry=registry) except jsonschema.exceptions.ValidationError as e: raise ValueError( - f"SchemaManager: Validation of instance against MITE schema led to an error: '{e!s}" + f"SchemaManager: Validation of instance against " + f"MITE schema led to an error: '{e!s}" ) from e diff --git a/pyproject.toml b/pyproject.toml index 869f565..0152011 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "mite_schema" -version = "1.0" +version = "1.1" description = "Containing the Minimum Information about a Tailoring Enzymes schema and auxiliary methods" readme = "README.md" requires-python = ">=3.12" @@ -24,7 +24,6 @@ classifiers = [ "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Chemistry" ] - dependencies = [ "argparse~=1.4", "coloredlogs~=15.0", @@ -33,6 +32,10 @@ dependencies = [ "referencing~=0.35", ] +[project.urls] +Homepage = "https://github.com/mite-standard/mite_schema" +Issues = "https://github.com/mite-standard/mite_schema/issues" + [project.scripts] mite_schema = "mite_schema.main:main" diff --git a/tests/example_files/example_valid.json b/tests/example_files/example_valid.json index 4ce0b90..e60c54b 100644 --- a/tests/example_files/example_valid.json +++ b/tests/example_files/example_valid.json @@ -26,11 +26,11 @@ "enzyme": { "name": "McjC", "description": "Lassopeptide ATP-dependent lactam synthetase (C-protein, asparagine synthetase-like)", - "databaseIds": [ - "uniprot:Q9X2V9", - "genpept:AAD28496.1", - "mibig:BGC0000581" - ], + "databaseIds": { + "uniprot": "Q9X2V9", + "genpept": "AAD28496.1", + "mibig": "BGC0000581" + }, "references": [ "doi:10.1128/jb.181.8.2659-2662.1999", "doi:10.1002/cbic.201200016" @@ -39,10 +39,10 @@ { "name": "McjB", "description": "Lassopeptide cysteine protease. Required for macrolactam-synthethase function (folds and cleaves precursor peptide)", - "databaseIds": [ - "uniprot:Q9X2V8", - "genpept:AAD28495.1" - ] + "databaseIds": { + "uniprot": "Q9X2V8", + "genpept": "AAD28495.1" + } } ] }, @@ -67,6 +67,9 @@ "description": "Lasso macrocyclisation of microcin J25 after precursor cleavage, leading to mature product." } ], + "databaseIds": { + "mite": "MITE0000000" + }, "evidence": [ { "evidenceCode": [