cancervariants · korikuzma · Aug 7, 2023 · May 10, 2023 · May 10, 2023 · May 10, 2023
diff --git a/.flake8 b/.flake8
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -2,6 +2,7 @@ name: checks
 on: [push, pull_request]
 jobs:
   deps:
+    name: deps py${{ matrix.python-version }}
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -19,3 +20,13 @@ jobs:
         run: |
           python -m pip install pipenv
           pipenv install --skip-lock  # this is what Elastic beanstalk uses
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: black
+        uses: psf/black@stable
+
+      - name: ruff
+        uses: chartboost/ruff-action@v1
diff --git a/.gitignore b/.gitignore
@@ -25,8 +25,6 @@ Pipfile.lock
 
 .python-version
 
-pyproject.toml
-
 # Jupyter Notebook
 .ipynb_checkpoints/
 
@@ -38,3 +36,5 @@ pyproject.toml
 
 build/
 dynamodb_local_latest/
+
+*.http
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,22 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v1.4.0
-    hooks:
-    - id: flake8
-      additional_dependencies: [flake8-docstrings, flake8-quotes, flake8-import-order, flake8-annotations]
-    - id: check-added-large-files
-    - id: detect-private-key
-    - id: trailing-whitespace
-    - id: end-of-file-fixer
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v1.4.0
+  hooks:
+  - id: check-added-large-files
+  - id: detect-private-key
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.0.280
+  hooks:
+    - id: ruff
+      args: [ --fix, --exit-non-zero-on-fix ]
+- repo: https://github.com/psf/black
+  rev: 23.7.0
+  hooks:
+    - id: black
+      args: ["--check"]
+      language_version: python3.10
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2018 VICC
+Copyright (c) 2018-2023 VICC
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-
diff --git a/Pipfile b/Pipfile
@@ -7,16 +7,13 @@ verify_ssl = true
 pytest = "*"
 pytest-asyncio = "*"
 pytest-cov = "*"
-flake8 = "*"
-flake8-docstrings = "*"
-flake8-quotes = "*"
-flake8-annotations = "*"
-flake8-import-order = "*"
 pre-commit = "*"
 variation-normalizer = {editable = true, path = "."}
 jupyter = "*"
 ipykernel = "*"
 psycopg2-binary = "*"
+ruff = "*"
+black = "*"
 
 [packages]
 "biocommons.seqrepo" = "*"
@@ -28,4 +25,4 @@ gene-normalizer = "~=0.1.36"
 pyliftover = "*"
 boto3 = "*"
 "ga4gh.vrsatile.pydantic" = "~=0.0.13"
-cool-seq-tool = ">=0.1.13"
+cool-seq-tool = ">=0.1.14.dev0"
diff --git a/README.md b/README.md
@@ -1,28 +1,30 @@
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5894937.svg)](https://doi.org/10.5281/zenodo.5894937)
-
 # Variation Normalization
 
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5894937.svg)](https://doi.org/10.5281/zenodo.5894937)
+
 Services and guidelines for normalizing variation terms into [VRS](https://vrs.ga4gh.org/en/latest) and [VRSATILE](https://vrsatile.readthedocs.io/en/latest/) compatible representations.
 
-Public OpenAPI endpoint: https://normalize.cancervariants.org/variation
+Public OpenAPI endpoint: <https://normalize.cancervariants.org/variation>
 
 Installing with pip:
 
-```commandline
+```shell
 pip install variation-normalizer
 ```
 
 The variation-normalization repo depends on VRS and VRSATILE models, and therefore each variation-normalizer package on PyPI uses a particular version of VRS and VRSATILE. The correspondences between packages may be summarized as:
 
 | variation-normalization branch | variation-normalizer version | gene-normalizer version | ga4gh.vrsatile.pydantic version | VRS version | VRSATILE version |
 | ---- | --- | ---- | --- | --- | --- |
-| [main](https://github.com/cancervariants/variation-normalization/tree/main) | 0.5.X | 0.1.X | 0.0.X | [1.X.X](https://github.com/ga4gh/vrs) | [main](https://github.com/ga4gh/vrsatile/tree/main)
+| [main](https://github.com/cancervariants/variation-normalization/tree/main) | 0.6.X | 0.1.X | 0.0.X | [1.X.X](https://github.com/ga4gh/vrs) | [main](https://github.com/ga4gh/vrsatile/tree/main)
 | [staging](https://github.com/cancervariants/variation-normalization/tree/staging) | 0.7.X | 0.2.X | 0.1.X | [metaschema-update](https://github.com/ga4gh/vrs/tree/metaschema-update) | [metaschema-update](https://github.com/ga4gh/vrsatile/tree/metaschema-update)
 
 ## About
+
 Variation Normalization works by using four main steps: tokenization, classification, validation, and translation. During tokenization, we split strings on whitespace and parse to determine the type of token. During classification, we specify the order of tokens a classification can have. We then do validation checks such as ensuring references for a nucleotide or amino acid matches the expected value and validating a position exists on the given transcript. During translation, we return a VRS Allele object.
 
 Variation Normalization is limited to the following types of variants:
+
 * HGVS expressions and text representations (ex: `BRAF V600E`):
   * **protein (p.)**: substitution, deletion, insertion, deletion-insertion
   * **coding DNA (c.)**: substitution, deletion, insertion, deletion-insertion
@@ -36,14 +38,21 @@ We are working towards adding more types of variations, coordinates, and represe
 
 ### Endpoints
 
-The `/to_vrs` endpoint returns a list of validated VRS [Variations](https://vrs.ga4gh.org/en/1.2.0/terms_and_model.html#variation).
+#### `/to_vrs`
+
+Returns a list of validated VRS [Variations](https://vrs.ga4gh.org/en/stable/terms_and_model.html#variation).
+
+#### `/normalize`
 
-The `/normalize` endpoint returns a [Variation Descriptor](https://vrsatile.readthedocs.io/en/latest/value_object_descriptor/vod_index.html#variation-descriptor) containing the MANE Transcript, if one is found. If a genomic query is not given a gene, `normalize` will return its GRCh38 representation. Variation Normalizer relies on [**C**ommon **O**perations **O**n **L**ots-of **Seq**uences Tool (cool-seq-tool)](https://github.com/GenomicMedLab/cool-seq-tool) for retrieving MANE Transcript data. More information on the transcript selection algorithm can be found [here](https://github.com/GenomicMedLab/cool-seq-tool/blob/main/docs/TranscriptSelectionPriority.md).
+Feturns a [Variation Descriptor](https://vrsatile.readthedocs.io/en/latest/value_object_descriptor/vod_index.html#variation-descriptor) aligned to the prioritized transcript. The Variation Normalizer relies on [**C**ommon **O**perations **O**n **L**ots-of **Seq**uences Tool (cool-seq-tool)](https://github.com/GenomicMedLab/cool-seq-tool) for retrieving the prioritized transcript data. More information on the transcript selection algorithm can be found [here](https://github.com/GenomicMedLab/cool-seq-tool/blob/main/docs/TranscriptSelectionPriority.md).
+
+If a genomic variation query _is_ given a gene (E.g. `BRAF g.140753336A>T`), the associated cDNA representation will be returned. This is because the gene provides additional strand context. If a genomic variation query is _not_ given a gene, the GRCh38 representation will be returned.
 
 ## Developer Instructions
 
 Clone the repo:
-```
+
+```shell
 git clone https://github.com/cancervariants/variation-normalization.git
 cd variation-normalization
 ```
@@ -54,10 +63,9 @@ for direction on installing pipenv in your compute environment.
 
 Once installed, from the project root dir, just run:
 
-```commandline
+```shell
 pipenv shell
-pipenv lock && pipenv sync
-pipenv install --dev
+pipenv update && pipenv install --dev
 ```
 
 ### Backend Services
@@ -73,74 +81,83 @@ You must also have Gene Normalization's DynamoDB running in a separate terminal
 For more information about the gene-normalizer and how to load the database, visit the [README](https://github.com/cancervariants/gene-normalization/blob/main/README.md).
 
 #### SeqRepo
+
 Variation Normalization relies on [seqrepo](https://github.com/biocommons/biocommons.seqrepo), which you must download yourself.
 
 Variation Normalizer uses seqrepo to retrieve sequences at given positions on a transcript.
 
 From the _root_ directory:
-```
+
+```shell
 pip install seqrepo
 sudo mkdir /usr/local/share/seqrepo
 sudo chown $USER /usr/local/share/seqrepo
 seqrepo pull -i 2021-01-29  # Replace with latest version using `seqrepo list-remote-instances` if outdated
 ```
 
 If you get an error similar to the one below:
-```
+
+```shell
 PermissionError: [Error 13] Permission denied: '/usr/local/share/seqrepo/2021-01-29._fkuefgd' -> '/usr/local/share/seqrepo/2021-01-29'
 ```
 
 You will want to do the following:\
 (*Might not be ._fkuefgd, so replace with your error message path*)
-```console
+
+```shell
 sudo mv /usr/local/share/seqrepo/2021-01-29._fkuefgd /usr/local/share/seqrepo/2021-01-29
 exit
 ```
 
 Use the `SEQREPO_ROOT_DIR` environment variable to set the path of an already existing SeqRepo directory. The default is `/usr/local/share/seqrepo/latest`.
 
 #### UTA
+
 Variation Normalizer also uses [**C**ommon **O**perations **O**n **L**ots-of **Seq**uences Tool (cool-seq-tool)](https://github.com/GenomicMedLab/cool-seq-tool) which uses [UTA](https://github.com/biocommons/uta) as the underlying PostgreSQL database.
 
 _The following commands will likely need modification appropriate for the installation environment._
+
 1. Install [PostgreSQL](https://www.postgresql.org/)
 2. Create user and database.
 
-    ```
-    $ createuser -U postgres uta_admin
-    $ createuser -U postgres anonymous
-    $ createdb -U postgres -O uta_admin uta
+    ```shell
+    createuser -U postgres uta_admin
+    createuser -U postgres anonymous
+    createdb -U postgres -O uta_admin uta
     ```
 
 3. To install locally, from the _variation/data_ directory:
-```
+
+```shell
 export UTA_VERSION=uta_20210129.pgd.gz
 curl -O http://dl.biocommons.org/uta/$UTA_VERSION
 gzip -cdq ${UTA_VERSION} | grep -v "^REFRESH MATERIALIZED VIEW" | psql -h localhost -U uta_admin --echo-errors --single-transaction -v ON_ERROR_STOP=1 -d uta -p 5433
 ```
 
 ##### UTA Installation Issues
+
 If you have trouble installing UTA, you can visit [these two READMEs](https://github.com/ga4gh/vrs-python/tree/main/docs/setup_help).
 
 ##### Connecting to the UTA database
-To connect to the UTA database, you can use the default url (`postgresql://uta_admin@localhost:5433/uta/uta_20210129`). If you use the default url, you must either set the password using environment variable `UTA_PASSWORD` or setting the parameter `db_pwd` in the UTA class.
 
-If you do not wish to use the default, you must set the environment variable `UTA_DB_URL` which has the format of `driver://user:pass@host:port/database/schema`.
+To connect to the UTA database, you can use the default url (`postgresql://uta_admin@localhost:5433/uta/uta_20210129`). If you do not wish to use the default, you must set the environment variable `UTA_DB_URL` which has the format of `driver://user:pass@host:port/database/schema`.
 
 ## Starting the Variation Normalization Service Locally
+
 `gene-normalizer`s dynamodb and the `uta` database must be running.
 
 To start the service, run the following:
 
-```commandline
+```shell
 uvicorn variation.main:app --reload
 ```
 
 Next, view the OpenAPI docs on your local machine:
-http://127.0.0.1:8000/variation
+<http://127.0.0.1:8000/variation>
 
 ### Init coding style tests
-Code style is managed by [flake8](https://github.com/PyCQA/flake8) and checked prior to commit.
+
+Code style is managed by [Ruff](https://github.com/astral-sh/ruff) and checked prior to commit.
 
 We use [pre-commit](https://pre-commit.com/#usage) to run conformance tests.
 
@@ -153,12 +170,14 @@ This ensures:
 
 Before first commit run:
 
-```
+```shell
 pre-commit install
 ```
 
 ### Testing
+
 From the _root_ directory of the repository:
-```
+
+```shell
 pytest tests/
 ```