diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index ba56d350..00000000 --- a/.coveragerc +++ /dev/null @@ -1,7 +0,0 @@ -[run] -source = - variant/ - tests/ -omit = - setup.py - docs/* \ No newline at end of file diff --git a/.ebextensions/01_download_data.config b/.ebextensions/01_download_data.config index e1358921..8ee01fd2 100644 --- a/.ebextensions/01_download_data.config +++ b/.ebextensions/01_download_data.config @@ -3,46 +3,24 @@ commands: command: "yum install -y python-devel postgresql-devel" 02_install_aws_cli: command: "yum install -y awscli" - 03_install_p7zip: - command: "yum install -y p7zip" - 04_eb_packages: + 03_eb_packages: command: "/var/app/venv/staging-LQM1lest/bin/pip install uvloop websockets httptools typing-extensions" - 05_export_eb_env_var: + 04_export_eb_env_var: command: "export $(cat /opt/elasticbeanstalk/deployment/env | xargs)" container_commands: - 01_cool_seq_tool_permissions: - test: test -d "/var/app/venv/staging-LQM1lest/lib/python3.11/site-packages/cool_seq_tool" - command: "chmod -R 777 /var/app/venv/staging-LQM1lest/lib/python3.11/site-packages/cool_seq_tool/data" + 01_s3_download: + test: test ! -d "/usr/local/share/seqrepo/2024-02-20" + command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.tar.gz --region us-east-2" - 02_s3_download: - test: test ! -d "/usr/local/share/seqrepo" - command: "aws s3 cp s3://${AWS_BUCKET_NAME}/${AWS_SEQREPO_OBJECT} /usr/local/share/seqrepo.zip --region us-east-2" + 02_extract_seqrepo: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "mkdir -p /usr/local/share/seqrepo/2024-02-20 && tar -xzvf /usr/local/share/seqrepo.tar.gz -C /usr/local/share/seqrepo/2024-02-20" - 03_p7zip_seqrepo: - test: test -f "/usr/local/share/seqrepo.zip" - command: "7za x /usr/local/share/seqrepo.zip -o/usr/local/share -y" + 03_seqrepo_zip_permission: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "chmod +wr /usr/local/share/seqrepo.tar.gz" - 04_seqrepo_permission: - test: test -d "/usr/local/share/seqrepo" - command: "chmod -R 777 /usr/local/share/seqrepo" - - 05_macosx_permission: - test: test -d "/usr/local/share/__MACOSX" - command: "chmod -R +wr /usr/local/share/__MACOSX" - - 06_seqrepo_zip_permission: - test: test -f "/usr/local/share/seqrepo.zip" - command: "chmod +wr /usr/local/share/seqrepo.zip" - - 07_remove_macosx: - test: test -d "/usr/local/share/__MACOSX" - command: "rm -R /usr/local/share/__MACOSX" - - 08_remove_seqrepo_zip: - test: test -f "/usr/local/share/seqrepo.zip" - command: "rm /usr/local/share/seqrepo.zip" - - 9_data_permission: - test: test -d "/usr/local/share/seqrepo" - command: "chmod -R +wrx /usr/local/share/seqrepo" + 04_remove_seqrepo_zip: + test: test -f "/usr/local/share/seqrepo.tar.gz" + command: "rm /usr/local/share/seqrepo.tar.gz" diff --git a/.github/ISSUE_TEMPLATE/bug-report.yaml b/.github/ISSUE_TEMPLATE/bug-report.yaml new file mode 100644 index 00000000..5b3f7c8d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yaml @@ -0,0 +1,85 @@ +name: Bug Report +description: Report a bug. +labels: ["bug"] +body: + - type: textarea + attributes: + label: Describe the bug + description: Provide a clear and concise description of what the bug is. + validations: + required: true + - type: textarea + attributes: + label: Steps to reproduce + description: Provide detailed steps to replicate the bug. + placeholder: | + 1. In this environment... + 2. With this config... + 3. Run '...' + 4. See error... + validations: + required: true + - type: textarea + attributes: + label: Expected behavior + description: What did you expect to happen? + validations: + required: true + - type: textarea + attributes: + label: Current behavior + description: | + What actually happened? + + Include full errors, stack traces, and/or relevant logs. + validations: + required: true + - type: textarea + attributes: + label: Possible reason(s) + description: Provide any insights into what might be causing the issue. + validations: + required: false + - type: textarea + attributes: + label: Suggested fix + description: Provide any suggestions on how to resolve the bug. + validations: + required: false + - type: textarea + attributes: + label: Branch, commit, and/or version + description: Provide the branch, commit, and/or version you're using. + placeholder: | + branch: issue-1 + commit: abc123d + validations: + required: true + - type: textarea + attributes: + label: Screenshots + description: If applicable, add screenshots with descriptions to help explain your problem. + validations: + required: false + - type: textarea + attributes: + label: Environment details + description: Provide environment details (OS name and version, etc). + validations: + required: true + - type: textarea + attributes: + label: Additional details + description: Provide any other additional details about the problem. + validations: + required: false + - type: dropdown + attributes: + label: Contribution + description: Can you contribute to the development of this feature? + options: + - "Yes, I can create a PR for this fix." + - "Yes, but I can only provide ideas and feedback." + - "No, I cannot contribute." + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature-request.yaml b/.github/ISSUE_TEMPLATE/feature-request.yaml new file mode 100644 index 00000000..2e1ae644 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yaml @@ -0,0 +1,60 @@ +name: Feature Request +description: Suggest an idea for this project. +labels: ["enhancement"] +body: + - type: textarea + attributes: + label: Feature description + description: Provide a clear and concise description of what you want to happen. + validations: + required: true + - type: textarea + attributes: + label: Use case + description: | + Why do you need this feature? For example: "I'm always frustrated when..." + validations: + required: true + - type: textarea + attributes: + label: Proposed solution + description: Provide proposed solution. + validations: + required: false + - type: textarea + attributes: + label: Alternatives considered + description: Describe any alternative solutions you've considered. + validations: + required: false + - type: textarea + attributes: + label: Implementation details + description: Provide any technical details on how the feature might be implemented. + validations: + required: false + - type: textarea + attributes: + label: Potential Impact + description: | + Discuss any potential impacts of this feature on existing functionality or performance, if known. + Will this feature cause breaking changes? + What challenges might arise? + validations: + required: false + - type: textarea + attributes: + label: Additional context + description: Provide any other context or screenshots about the feature. + validations: + required: false + - type: dropdown + attributes: + label: Contribution + description: Can you contribute to the development of this feature? + options: + - "Yes, I can create a PR for this feature." + - "Yes, but I can only provide ideas and feedback." + - "No, I cannot contribute." + validations: + required: false diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yaml similarity index 80% rename from .github/workflows/checks.yml rename to .github/workflows/checks.yaml index ad485bc0..1faa72b7 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yaml @@ -7,12 +7,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -23,10 +23,10 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.11 diff --git a/.github/workflows/pr-priority-label.yaml b/.github/workflows/pr-priority-label.yaml new file mode 100644 index 00000000..0bef462d --- /dev/null +++ b/.github/workflows/pr-priority-label.yaml @@ -0,0 +1,23 @@ +name: Pull Request Has Priority Label +on: + pull_request: + types: [opened, labeled, unlabeled, synchronize] +jobs: + pr-priority-label: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + outputs: + status: ${{ steps.check-labels.outputs.status }} + steps: + - id: check-labels + uses: mheap/github-action-required-labels@v5 + with: + mode: exactly + count: 1 + labels: "priority:*" + use_regex: true + add_comment: true + message: "PRs require a priority label. Please add one." + exit_type: failure diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1d8c7ac3..c3bff3c7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.x" - name: Install pypa/build @@ -25,7 +25,7 @@ jobs: - name: Build a binary wheel and a source tarball run: python3 -m build - name: Store the distribution packages - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: python-package-distributions path: dist/ @@ -42,7 +42,7 @@ jobs: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: Download all the dists - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: python-package-distributions path: dist/ diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 00000000..66bb8613 --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,27 @@ +name: "Stalebot for issues and PRs" + +on: + schedule: + - cron: "30 13 * * 1-5" + +jobs: + stale-high-priority: + uses: genomicmedlab/software-templates/.github/workflows/reusable-stale.yaml@main + with: + days-before-issue-stale: 90 + days-before-pr-stale: 1 + labels: priority:high + + stale-medium-priority: + uses: genomicmedlab/software-templates/.github/workflows/reusable-stale.yaml@main + with: + days-before-issue-stale: 135 + days-before-pr-stale: 3 + labels: priority:medium + + stale-low-priority: + uses: genomicmedlab/software-templates/.github/workflows/reusable-stale.yaml@main + with: + days-before-issue-stale: 180 + days-before-pr-stale: 7 + labels: priority:low diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e29f947..c4fdba47 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,15 +2,19 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 # pre-commit-hooks version hooks: - id: check-added-large-files - id: detect-private-key - id: trailing-whitespace - id: end-of-file-fixer + - id: check-merge-conflict + - id: detect-aws-credentials + args: [ --allow-missing-credentials ] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.5.0 # ruff version hooks: - id: ruff-format - id: ruff args: [ --fix, --exit-non-zero-on-fix ] +minimum_pre_commit_version: 3.7.1 diff --git a/Dockerfile b/Dockerfile index d15f4c3c..8b1d8cb9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # Runs service on port 80. # Healthchecks service up every 5m. -FROM python:3.7 +FROM python:3.10 RUN apt update ; apt install -y rsync RUN pip install pipenv uvicorn[standard] COPY . /app @@ -13,4 +13,4 @@ EXPOSE 80 HEALTHCHECK --interval=5m --timeout=3s \ CMD curl -f http://localhost/variation || exit 1 -CMD pipenv run uvicorn variation.main:app --port 80 --host 0.0.0.0 +CMD pipenv run uvicorn variation.main:app --port 80 --host 0.0.0.0 \ No newline at end of file diff --git a/LICENSE b/LICENSE index 70dd49d6..de52c853 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018-2023 VICC +Copyright (c) 2018-2024 VICC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Pipfile b/Pipfile index ae4f085d..f8bef445 100644 --- a/Pipfile +++ b/Pipfile @@ -7,20 +7,20 @@ verify_ssl = true pytest = "*" pytest-asyncio = "*" pytest-cov = "*" -pre-commit = "*" +pre-commit = ">=3.7.1" variation-normalizer = {editable = true, path = "."} jupyter = "*" ipykernel = "*" psycopg2-binary = "*" -ruff = "==0.2.0" +ruff = "==0.5.0" [packages] "biocommons.seqrepo" = "*" fastapi = "*" uvicorn = "*" pydantic = "==2.*" -"ga4gh.vrs" = {version = "~=2.0.0a5", extras = ["extras"]} -gene-normalizer = "~=0.3.0.dev1" +"ga4gh.vrs" = {version = "~=2.0.0a10", extras = ["extras"]} +gene-normalizer = "~=0.4.0" boto3 = "*" -cool-seq-tool = "~=0.4.0.dev1" +cool-seq-tool = "~=0.5.0" bioutils = "*" diff --git a/README.md b/README.md index 2d4e7425..e7aaff74 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,28 @@ # Variation Normalization -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5894937.svg)](https://doi.org/10.5281/zenodo.5894937) +[![image](https://img.shields.io/pypi/v/variation-normalizer.svg)](https://pypi.python.org/pypi/variation-normalizer) [![image](https://img.shields.io/pypi/l/variation-normalizer.svg)](https://pypi.python.org/pypi/variation-normalizer) [![image](https://img.shields.io/pypi/pyversions/variation-normalizer.svg)](https://pypi.python.org/pypi/variation-normalizer) [![Actions status](https://github.com/cancervariants/variation-normalization/actions/workflows/checks.yaml/badge.svg)](https://github.com/cancervariants/variation-normalization/actions/checks.yaml)[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5894937.svg)](https://doi.org/10.5281/zenodo.5894937) -Services and guidelines for normalizing variation terms to [VRS](https://vrs.ga4gh.org/en/latest) compatible representations. + +The Variation Normalizer parses and translates free-text descriptions of genomic variations into computable objects conforming to the [Variation Representation Specification (VRS)](https://vrs.ga4gh.org/en/latest), enabling consistent and accurate variant harmonization across a diversity of genomic knowledge resources. + -Public OpenAPI endpoint: +--- -Installing with pip: +[Live OpenAPI endpoint](https://normalize.cancervariants.org/variation) + +--- + +## Installation + +Install from [PyPI](https://pypi.org/project/variation-normalizer): ```shell -pip install variation-normalizer +python3 -m pip install variation-normalizer ``` -The variation-normalization repo depends on VRS models, and therefore each variation-normalizer package on PyPI uses a particular version of VRS. The correspondences between packages may be summarized as: - -| variation-normalization branch | variation-normalizer version | gene-normalizer version | VRS version | -| ---- | --- | ---- | --- | -| [main](https://github.com/cancervariants/variation-normalization/tree/main) | 0.6.X | 0.1.X | [1.X.X](https://github.com/ga4gh/vrs) | -| [staging](https://github.com/cancervariants/variation-normalization/tree/staging) | 0.8.X | 0.3.X | [2.0-alpha](https://github.com/ga4gh/vrs/tree/2.0-alpha) | +--- -## About +## Normalization Variation Normalization works by using four main steps: tokenization, classification, validation, and translation. During tokenization, we split strings on whitespace and parse to determine the type of token. During classification, we specify the order of tokens a classification can have. We then do validation checks such as ensuring references for a nucleotide or amino acid matches the expected value and validating a position exists on the given transcript. During translation, we return a VRS Allele object. @@ -36,7 +39,16 @@ Variation Normalizer accepts input from GRCh37 or GRCh8 assemblies. We are working towards adding more types of variations, coordinates, and representations. -### Endpoints +### VRS Versioning + +The variation-normalization repo depends on VRS models, and therefore each variation-normalizer package on PyPI uses a particular version of VRS. The correspondences between packages may be summarized as: + +| variation-normalization branch | variation-normalizer version | gene-normalizer version | VRS version | +| ---- | --- | ---- | --- | +| [main](https://github.com/cancervariants/variation-normalization/tree/main) | 0.6.X | 0.1.X | [1.X.X](https://github.com/ga4gh/vrs) | +| [staging](https://github.com/cancervariants/variation-normalization/tree/staging) | >=0.8.X | >=0.3.X | [2.0-alpha](https://github.com/ga4gh/vrs/tree/2.0-alpha) | + +### Available Endpoints #### `/to_vrs` @@ -48,7 +60,7 @@ Returns a VRS Variation aligned to the prioritized transcript. The Variation Nor If a genomic variation query _is_ given a gene (E.g. `BRAF g.140753336A>T`), the associated cDNA representation will be returned. This is because the gene provides additional strand context. If a genomic variation query is _not_ given a gene, the GRCh38 representation will be returned. -## Developer Instructions +## Development Clone the repo: @@ -68,7 +80,7 @@ pipenv shell pipenv update && pipenv install --dev ``` -### Backend Services +### Required resources Variation Normalization relies on some local data caches which you will need to set up. It uses pipenv to manage its environment, which you will also need to install. @@ -91,20 +103,20 @@ From the _root_ directory: pip install seqrepo sudo mkdir /usr/local/share/seqrepo sudo chown $USER /usr/local/share/seqrepo -seqrepo pull -i 2021-01-29 # Replace with latest version using `seqrepo list-remote-instances` if outdated +seqrepo pull -i 2024-02-20/ # Replace with latest version using `seqrepo list-remote-instances` if outdated ``` If you get an error similar to the one below: ```shell -PermissionError: [Error 13] Permission denied: '/usr/local/share/seqrepo/2021-01-29._fkuefgd' -> '/usr/local/share/seqrepo/2021-01-29' +PermissionError: [Error 13] Permission denied: '/usr/local/share/seqrepo/2024-02-20/._fkuefgd' -> '/usr/local/share/seqrepo/2024-02-20/' ``` You will want to do the following:\ (*Might not be ._fkuefgd, so replace with your error message path*) ```shell -sudo mv /usr/local/share/seqrepo/2021-01-29._fkuefgd /usr/local/share/seqrepo/2021-01-29 +sudo mv /usr/local/share/seqrepo/2024-02-20._fkuefgd /usr/local/share/seqrepo/2024-02-20 exit ``` @@ -154,11 +166,11 @@ uvicorn variation.main:app --reload Next, view the OpenAPI docs on your local machine: -### Init coding style tests +### Code QC Code style is managed by [Ruff](https://docs.astral.sh/ruff/) and checked prior to commit. -Check style with `ruff`: +To perform formatting and check style: ```shell python3 -m ruff format . && python3 -m ruff check --fix . @@ -186,3 +198,25 @@ From the _root_ directory of the repository: ```shell pytest tests/ ``` + +### Docker Setup + +From the root directory , where Docker and docker-compose file are , run the following commands: + +*docker-compose up -d db +*docker-compose up -d uta +*docker-compose up -d dynamodb-local + +Wait for all containers to start , Db service which is seqrepo container takes time and exits once the sequences download is over. + +Before starting the Variation-normalizer service, check the following: + +* Db container(seqrepo) has finished downloading data and has exited. +* UTA service has dispalyed the message " database system is ready to accept connections" in logs. +* Dynamodb container is up and running. + +Start the Variation Normalizer container with the following commnad: + +docker-compose up -d app + +The container will be up and running however , it downloads the gene normalizer data and that takes time. Please wait till the ETL for gene database is done. After that Variation normalizer is ready to use. \ No newline at end of file diff --git a/biomart.png b/biomart.png deleted file mode 100644 index e2f3e1f8..00000000 Binary files a/biomart.png and /dev/null differ diff --git a/cron.yaml b/cron.yaml deleted file mode 100644 index 675190d5..00000000 --- a/cron.yaml +++ /dev/null @@ -1,5 +0,0 @@ -version: 1 -cron: - - name: "task1" - url: "/scheduled" - schedule: "* * * * *" diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..12eb3191 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,58 @@ +version: "3.9" + +services: + app: + build: + context: . + environment: + SEQREPO_ROOT_DIR: /usr/local/share/seqrepo/2024-02-20 + AWS_ACCESS_KEY_ID: 'DUMMYIDEXAMPLE' + AWS_SECRET_ACCESS_KEY: 'DUMMYEXAMPLEKEY' + AWS_DEFAULT_REGION: 'us-west-2' + GENE_NORM_DB_URL: http://dynamodb-local:8000 + + links: + - "dynamodb-local" + + ports: + - "8001:80" + + volumes: + - seqrepo_vol:/usr/local/share/seqrepo + command: > + sh -c "pipenv run gene_norm_update --update_all --update_merged && + cd src wait_for_db && + pipenv run uvicorn variation.main:app --log-level debug --port 80 --host 0.0.0.0" + depends_on: + - db + - dynamodb-local + - uta + + db: + image: biocommons/seqrepo:latest + volumes: + - seqrepo_vol:/usr/local/share/seqrepo + + uta: + image: biocommons/uta:uta_20210129b + environment: + - POSTGRES_PASSWORD=some-password-that-you-make-up + volumes: + - seqrepo_vol:/var/lib/postgresql/data + ports: + - 5432:5432 + + dynamodb-local: + command: "-jar DynamoDBLocal.jar -sharedDb -dbPath ./data" + image: "amazon/dynamodb-local:1.18.0" + container_name: dynamodb-local + ports: + - "8000:8000" + user: root + volumes: + - "seqrepo_vol:/home/dynamodblocal/data" + working_dir: /home/dynamodblocal + +volumes: + seqrepo_vol: + uta_vol: \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8022269a..ecfda5ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,12 +16,11 @@ classifiers = [ "Topic :: Scientific/Engineering :: Bio-Informatics", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] -requires-python = ">=3.8" +requires-python = ">=3.10" description = "VICC normalization routine for variations" license = {file = "LICENSE"} dependencies = [ @@ -29,17 +28,17 @@ dependencies = [ "fastapi", "uvicorn", "pydantic ==2.*", - "ga4gh.vrs[extras] ~= 2.0.0a5", - "gene-normalizer ~=0.3.0.dev1", + "ga4gh.vrs[extras] ~= 2.0.0a10", + "gene-normalizer ~=0.4.0", "boto3", - "cool-seq-tool ~=0.4.0.dev1", + "cool-seq-tool ~=0.5.0", "bioutils" ] dynamic = ["version"] [project.optional-dependencies] test = ["pytest>=6.0", "pytest-cov", "pytest-asyncio"] -dev = ["pre-commit", "ruff==0.2.0", "psycopg2-binary", "jupyter", "ipykernel"] +dev = ["pre-commit>=3.7.1", "ruff==0.5.0", "psycopg2-binary", "jupyter", "ipykernel"] [project.urls] Homepage = "https://github.com/cancervariants/variation-normalization" @@ -49,18 +48,10 @@ Source = "https://github.com/cancervariants/variation-normalization" "Bug Tracker" = "https://github.com/cancervariants/variation-normalization/issues" [build-system] -requires = ["setuptools>=61.0"] +requires = ["setuptools>=64", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" -[tool.setuptools.dynamic] -version = {attr = "variation.version.__version__"} - -# Scanning for namespace packages in the ``src`` directory is true by -# default in pyproject.toml, so you do NOT need to include the -# `tool.setuptools.packages.find` if it looks like the following: -# [tool.setuptools.packages.find] -# namespaces = true -# where = ["src"] +[tool.setuptools_scm] [tool.pytest.ini_options] addopts = "--cov=src --cov-report term-missing" @@ -71,8 +62,12 @@ branch = true [tool.ruff] src = ["src"] +extend-exclude = [ + "codebuild/*" +] -lint.select = [ +[tool.ruff.lint] +select = [ "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w "I", # https://docs.astral.sh/ruff/rules/#isort-i @@ -88,20 +83,25 @@ lint.select = [ "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em + "LOG", # https://docs.astral.sh/ruff/rules/#flake8-logging-log "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g + "INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret + "SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf + "FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf ] - -lint.fixable = [ +fixable = [ "I", "F401", "D", @@ -109,15 +109,19 @@ lint.fixable = [ "ANN", "B", "C4", + "LOG", "G", "PIE", "PT", "RSE", "SIM", + "PERF", + "FURB", "RUF" ] -# ANN101 - missing-type-self # ANN003 - missing-type-kwargs +# ANN101 - missing-type-self +# ANN102 - missing-type-cls # D203 - one-blank-line-before-class # D205 - blank-line-after-summary # D206 - indent-with-spaces* @@ -132,8 +136,8 @@ lint.fixable = [ # W191 - tab-indentation* # PGH003 - blanket-type-ignore # *ignored for compatibility with formatter -lint.ignore = [ - "ANN101", "ANN003", +ignore = [ + "ANN003", "ANN101", "ANN102", "D203", "D205", "D206", "D213", "D300", "D400", "D415", "E111", "E114", "E117", "E501", "W191", @@ -145,15 +149,22 @@ lint.ignore = [ # ANN001 - missing-type-function-argument # ANN2 - missing-return-type # ANN201 - Missing type annotation -# ANN102 - missing-type-cls # N805 - invalid-first-argument-name-for-method # S101 - assert # B011 - assert-false +# SLF001 - private-member-access +# INP001 - implicit-namespace-package # RUF001 - ambiguous-unicode-character-string -"tests/*" = ["ANN001", "ANN102", "ANN2", "S101", "B011"] +# ARG002 - unused-method-argument +"tests/*" = ["ANN001", "ANN2", "S101", "B011", "SLF001", "INP001"] "src/variation/schemas/*" = ["ANN001", "ANN201", "N805", "S101"] -"codebuild/*" = ["T201"] +"codebuild/*" = ["T201", "INP001"] +"src/variation/validators/*" = ["ARG002"] +"src/variation/translators/*" = ["ARG002"] [tool.ruff.lint.flake8-bugbear] # Allow default arguments like, e.g., `data: List[str] = fastapi.Query(None)`. extend-immutable-calls = ["fastapi.Query"] + +[tool.ruff.format] +docstring-code-format = true diff --git a/src/variation/__init__.py b/src/variation/__init__.py index a7a9ad81..d6ef6def 100644 --- a/src/variation/__init__.py +++ b/src/variation/__init__.py @@ -1,30 +1,10 @@ """The Variation Normalization package.""" -import logging -from os import environ -if "VARIATION_NORM_EB_PROD" in environ: - LOG_FN = "/tmp/variation.log" # noqa: S108 -else: - LOG_FN = "variation.log" +from importlib.metadata import PackageNotFoundError, version -logging.basicConfig( - filename=LOG_FN, format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s" -) -logger = logging.getLogger("variation") -logger.setLevel(logging.DEBUG) -logger.handlers = [] - -logging.getLogger("cool_seq_tool").setLevel(logging.INFO) -logging.getLogger("boto3").setLevel(logging.INFO) -logging.getLogger("botocore").setLevel(logging.INFO) -logging.getLogger("urllib3").setLevel(logging.INFO) -logging.getLogger("python_jsonschema_objects").setLevel(logging.INFO) -logging.getLogger("hgvs.parser").setLevel(logging.INFO) -logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO) -logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO) -logging.getLogger("asyncio").setLevel(logging.INFO) - -if "VARIATION_NORM_EB_PROD" in environ: - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) - logger.addHandler(ch) +try: + __version__ = version("variation-normalizer") +except PackageNotFoundError: + __version__ = "unknown" +finally: + del version, PackageNotFoundError diff --git a/src/variation/classifiers/__init__.py b/src/variation/classifiers/__init__.py index 6b68ec5f..fd43e822 100644 --- a/src/variation/classifiers/__init__.py +++ b/src/variation/classifiers/__init__.py @@ -1,4 +1,5 @@ """Classifier package level import.""" + from .amplification_classifier import AmplificationClassifier from .cdna_deletion_classifier import CdnaDeletionClassifier from .cdna_delins_classifier import CdnaDelInsClassifier diff --git a/src/variation/classifiers/amplification_classifier.py b/src/variation/classifiers/amplification_classifier.py index 69bb6832..b6c6a072 100644 --- a/src/variation/classifiers/amplification_classifier.py +++ b/src/variation/classifiers/amplification_classifier.py @@ -1,5 +1,4 @@ """A module for the Amplification Classifier""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class AmplificationClassifier(Classifier): """The Amplification Classifier class""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the amplification classification. :return: List of list of tokens, where order matters, that represent an @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.AMPLIFICATION]] - def match(self, tokens: List[Token]) -> AmplificationClassification: + def match(self, tokens: list[Token]) -> AmplificationClassification: """Return the amplification classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for an diff --git a/src/variation/classifiers/cdna_deletion_classifier.py b/src/variation/classifiers/cdna_deletion_classifier.py index 929854c1..e2f8965c 100644 --- a/src/variation/classifiers/cdna_deletion_classifier.py +++ b/src/variation/classifiers/cdna_deletion_classifier.py @@ -1,5 +1,4 @@ """A module for the Cdna Deletion Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class CdnaDeletionClassifier(Classifier): """The Cdna Deletion Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the cdna deletion classification. :return: List of list of tokens, where order matters, that represent a cdna @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.CDNA_DELETION]] - def match(self, tokens: List[Token]) -> CdnaDeletionClassification: + def match(self, tokens: list[Token]) -> CdnaDeletionClassification: """Return the cdna deletion classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/cdna_delins_classifier.py b/src/variation/classifiers/cdna_delins_classifier.py index 891906e1..50503502 100644 --- a/src/variation/classifiers/cdna_delins_classifier.py +++ b/src/variation/classifiers/cdna_delins_classifier.py @@ -1,5 +1,4 @@ """A module for the Cdna DelIns Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class CdnaDelInsClassifier(Classifier): """The Cdna DelIns Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the cdna delins classification. :return: List of list of tokens, where order matters, that represent a cdna @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.CDNA_DELINS]] - def match(self, tokens: List[Token]) -> CdnaDelInsClassification: + def match(self, tokens: list[Token]) -> CdnaDelInsClassification: """Return the cdna delins classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/cdna_insertion_classifier.py b/src/variation/classifiers/cdna_insertion_classifier.py index dd4daf01..7a05a9f9 100644 --- a/src/variation/classifiers/cdna_insertion_classifier.py +++ b/src/variation/classifiers/cdna_insertion_classifier.py @@ -1,5 +1,4 @@ """A module for the Cdna insertion Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class CdnaInsertionClassifier(Classifier): """The Cdna insertion Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the cdna insertion classification. :return: List of list of tokens, where order matters, that represent a cdna @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.CDNA_INSERTION]] - def match(self, tokens: List[Token]) -> CdnaInsertionClassification: + def match(self, tokens: list[Token]) -> CdnaInsertionClassification: """Return the cdna insertion classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/cdna_reference_agree_classifier.py b/src/variation/classifiers/cdna_reference_agree_classifier.py index 297dc7eb..b9aa5d60 100644 --- a/src/variation/classifiers/cdna_reference_agree_classifier.py +++ b/src/variation/classifiers/cdna_reference_agree_classifier.py @@ -1,5 +1,4 @@ """A module for the cDNA Reference Agree Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class CdnaReferenceAgreeClassifier(Classifier): """The Cdna Reference Agree Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the cdna reference agree classification. @@ -28,7 +27,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: ], ] - def match(self, tokens: List[Token]) -> CdnaReferenceAgreeClassification: + def match(self, tokens: list[Token]) -> CdnaReferenceAgreeClassification: """Return the cdna reference agree classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/cdna_substitution_classifier.py b/src/variation/classifiers/cdna_substitution_classifier.py index aad05ba1..bf28bca2 100644 --- a/src/variation/classifiers/cdna_substitution_classifier.py +++ b/src/variation/classifiers/cdna_substitution_classifier.py @@ -1,5 +1,4 @@ """A module for the Cdna Substitution Classifier.""" -from typing import List, Optional from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class CdnaSubstitutionClassifier(Classifier): """The Cdna Substitution Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the cdna substitution classification. :return: List of list of tokens, where order matters, that represent a cdna @@ -27,7 +26,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: ], ] - def match(self, tokens: List[Token]) -> Optional[CdnaSubstitutionClassification]: + def match(self, tokens: list[Token]) -> CdnaSubstitutionClassification | None: """Return the cdna substitution classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/classifier.py b/src/variation/classifiers/classifier.py index beaea3ce..6105e28a 100644 --- a/src/variation/classifiers/classifier.py +++ b/src/variation/classifiers/classifier.py @@ -1,6 +1,6 @@ """Module for Classification methods.""" + from abc import ABC, abstractmethod -from typing import List, Optional from variation.schemas.classification_response_schema import Classification from variation.schemas.token_response_schema import Token, TokenType @@ -10,7 +10,7 @@ class Classifier(ABC): """The Classifier class.""" @abstractmethod - def match(self, tokens: List[Token]) -> Optional[Classification]: + def match(self, tokens: list[Token]) -> Classification | None: """Return the classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a @@ -19,14 +19,14 @@ def match(self, tokens: List[Token]) -> Optional[Classification]: """ @abstractmethod - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for a given classification. :return: List of list of tokens, where order matters, that represent a given classification. """ - def can_classify(self, tokens: List[Token]) -> bool: + def can_classify(self, tokens: list[Token]) -> bool: """Return whether or not a list of tokens can be classified by a given classification @@ -35,10 +35,8 @@ def can_classify(self, tokens: List[Token]) -> bool: matters, to represent a given classification. `False`, otherwise. """ token_types = [t.token_type for t in tokens] - exact_matches: List[List[str]] = [] - - for candidate in self.exact_match_candidates(): - if token_types == candidate: - exact_matches.append(candidate) + exact_matches: list[list[TokenType]] = [ + c for c in self.exact_match_candidates() if token_types == c + ] return len(exact_matches) == 1 diff --git a/src/variation/classifiers/genomic_deletion_ambiguous.py b/src/variation/classifiers/genomic_deletion_ambiguous.py index 6173b651..fa93362f 100644 --- a/src/variation/classifiers/genomic_deletion_ambiguous.py +++ b/src/variation/classifiers/genomic_deletion_ambiguous.py @@ -1,5 +1,4 @@ """A module for the Genomic Deletion Ambiguous Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -13,7 +12,7 @@ class GenomicDeletionAmbiguousClassifier(Classifier): """The Genomic Deletion Ambiguous Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic ambiguous deletion classification. @@ -22,7 +21,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_DELETION_AMBIGUOUS]] - def match(self, tokens: List[Token]) -> GenomicDeletionAmbiguousClassification: + def match(self, tokens: list[Token]) -> GenomicDeletionAmbiguousClassification: """Return the genomic ambiguous deletion classification from a list of token matches. diff --git a/src/variation/classifiers/genomic_deletion_classifier.py b/src/variation/classifiers/genomic_deletion_classifier.py index e5b60f07..14271df1 100644 --- a/src/variation/classifiers/genomic_deletion_classifier.py +++ b/src/variation/classifiers/genomic_deletion_classifier.py @@ -1,5 +1,4 @@ """A module for the Genomic Deletion Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class GenomicDeletionClassifier(Classifier): """The Genomic Deletion Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic deletion classification. :return: List of list of tokens, where order matters, that represent a genomic @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_DELETION]] - def match(self, tokens: List[Token]) -> GenomicDeletionClassification: + def match(self, tokens: list[Token]) -> GenomicDeletionClassification: """Return the genomic deletion classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/genomic_delins_classifier.py b/src/variation/classifiers/genomic_delins_classifier.py index 9be0530b..77571c9c 100644 --- a/src/variation/classifiers/genomic_delins_classifier.py +++ b/src/variation/classifiers/genomic_delins_classifier.py @@ -1,5 +1,4 @@ """A module for the Genomic DelIns Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class GenomicDelInsClassifier(Classifier): """The Genomic DelIns Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic delins classification. :return: List of list of tokens, where order matters, that represent a genomic @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_DELINS]] - def match(self, tokens: List[Token]) -> GenomicDelInsClassification: + def match(self, tokens: list[Token]) -> GenomicDelInsClassification: """Return the genomic delins classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/genomic_duplication_ambiguous.py b/src/variation/classifiers/genomic_duplication_ambiguous.py index 6bcf9a5b..a2037399 100644 --- a/src/variation/classifiers/genomic_duplication_ambiguous.py +++ b/src/variation/classifiers/genomic_duplication_ambiguous.py @@ -1,5 +1,4 @@ """A module for the Genomic Duplication Ambiguous Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -13,7 +12,7 @@ class GenomicDuplicationAmbiguousClassifier(Classifier): """The Genomic Duplication Ambiguous Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic ambiguous duplication classification. @@ -22,7 +21,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_DUPLICATION_AMBIGUOUS]] - def match(self, tokens: List[Token]) -> GenomicDuplicationAmbiguousClassification: + def match(self, tokens: list[Token]) -> GenomicDuplicationAmbiguousClassification: """Return the genomic ambiguous duplication classification from a list of token matches. diff --git a/src/variation/classifiers/genomic_duplication_classifier.py b/src/variation/classifiers/genomic_duplication_classifier.py index 271eb962..722bd3aa 100644 --- a/src/variation/classifiers/genomic_duplication_classifier.py +++ b/src/variation/classifiers/genomic_duplication_classifier.py @@ -1,5 +1,4 @@ """A module for the Genomic Duplication Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class GenomicDuplicationClassifier(Classifier): """The Genomic Duplication Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic duplication classification. :return: List of list of tokens, where order matters, that represent a genomic @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_DUPLICATION]] - def match(self, tokens: List[Token]) -> GenomicDuplicationClassification: + def match(self, tokens: list[Token]) -> GenomicDuplicationClassification: """Return the genomic duplication classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/genomic_insertion_classifier.py b/src/variation/classifiers/genomic_insertion_classifier.py index 09a362e8..50835623 100644 --- a/src/variation/classifiers/genomic_insertion_classifier.py +++ b/src/variation/classifiers/genomic_insertion_classifier.py @@ -1,5 +1,4 @@ """A module for the Genomic Insertion Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class GenomicInsertionClassifier(Classifier): """The Genomic Insertion Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic insertion classification. :return: List of list of tokens, where order matters, that represent a genomic @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_INSERTION]] - def match(self, tokens: List[Token]) -> GenomicInsertionClassification: + def match(self, tokens: list[Token]) -> GenomicInsertionClassification: """Return the genomic insertion classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/genomic_reference_agree_classifier.py b/src/variation/classifiers/genomic_reference_agree_classifier.py index ccf9d939..05e7febf 100644 --- a/src/variation/classifiers/genomic_reference_agree_classifier.py +++ b/src/variation/classifiers/genomic_reference_agree_classifier.py @@ -1,5 +1,4 @@ """A module for the Genomic Reference Agree Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class GenomicReferenceAgreeClassifier(Classifier): """The Genomic Reference Agree Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic reference agree classification. @@ -21,7 +20,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.GENOMIC_REFERENCE_AGREE]] - def match(self, tokens: List[Token]) -> GenomicReferenceAgreeClassification: + def match(self, tokens: list[Token]) -> GenomicReferenceAgreeClassification: """Return the genomic reference agree classification from a list of token matches. diff --git a/src/variation/classifiers/genomic_substitution_classifier.py b/src/variation/classifiers/genomic_substitution_classifier.py index 0c2c47a7..a11bb145 100644 --- a/src/variation/classifiers/genomic_substitution_classifier.py +++ b/src/variation/classifiers/genomic_substitution_classifier.py @@ -1,5 +1,4 @@ """A module for the Genomic Substitution Classifier.""" -from typing import List, Optional from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class GenomicSubstitutionClassifier(Classifier): """The Genomic Substitution Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the genomic substitution classification. @@ -28,7 +27,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: ], ] - def match(self, tokens: List[Token]) -> Optional[GenomicSubstitutionClassification]: + def match(self, tokens: list[Token]) -> GenomicSubstitutionClassification | None: """Return the genomic substitution classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/gnomad_vcf_classifier.py b/src/variation/classifiers/gnomad_vcf_classifier.py index 9a5b8a3e..13938fd5 100644 --- a/src/variation/classifiers/gnomad_vcf_classifier.py +++ b/src/variation/classifiers/gnomad_vcf_classifier.py @@ -1,5 +1,4 @@ """A module for the gnomAD VCF Classifier""" -from typing import List, Optional, Union from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -16,7 +15,7 @@ class GnomadVcfClassifier(Classifier): """The gnomAD VCF Classifier""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the gnomad vcf classification. :return: List of list of tokens, where order matters, that represent a gnomad @@ -26,14 +25,13 @@ def exact_match_candidates(self) -> List[List[TokenType]]: def match( self, token: GnomadVcfToken - ) -> Optional[ - Union[ - GenomicReferenceAgreeClassification, - GenomicSubstitutionClassification, - GenomicInsertionClassification, - GenomicDeletionClassification, - ] - ]: + ) -> ( + GenomicReferenceAgreeClassification + | GenomicSubstitutionClassification + | GenomicInsertionClassification + | GenomicDeletionClassification + | None + ): """Return the genomic classification (either reference agree, substitution, insertion, or deletion) from a gnomad vcf token. Currently only support simple genomic variation. diff --git a/src/variation/classifiers/hgvs_classifier.py b/src/variation/classifiers/hgvs_classifier.py index f81b8391..e29ab584 100644 --- a/src/variation/classifiers/hgvs_classifier.py +++ b/src/variation/classifiers/hgvs_classifier.py @@ -1,6 +1,6 @@ """A module for the HGVS Classifier.""" + from re import Match, Pattern -from typing import Dict, List, Optional from cool_seq_tool.schemas import AnnotationLayer @@ -44,7 +44,7 @@ class HgvsClassifier(Classifier): """The HGVS Classifier.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the hgvs classification. :return: List of list of tokens, where order matters, that represent a hgvs @@ -52,7 +52,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.HGVS]] - def match(self, token: HgvsToken) -> Optional[Classification]: + def match(self, token: HgvsToken) -> Classification | None: """Return the classification from a hgvs token using regex matches to determine the type of classification. @@ -80,7 +80,7 @@ def match(self, token: HgvsToken) -> Optional[Classification]: return classification @staticmethod - def _regex_match(change: str, regex: Pattern) -> Optional[Match]: + def _regex_match(change: str, regex: Pattern) -> Match | None: """Strip parentheses from `change` and return whether or not `change` matches the `regex` @@ -95,8 +95,8 @@ def _regex_match(change: str, regex: Pattern) -> Optional[Match]: return match def _protein_classification( - self, token: HgvsToken, params: Dict - ) -> Optional[Classification]: + self, token: HgvsToken, params: dict + ) -> Classification | None: """Determine if hgvs token matches regex checks and return corresponding protein classification if a match is found @@ -154,8 +154,8 @@ def _protein_classification( return None def _cdna_classification( - self, token: HgvsToken, params: Dict - ) -> Optional[Classification]: + self, token: HgvsToken, params: dict + ) -> Classification | None: """Determine if hgvs token matches regex checks and return corresponding cdna classification if a match is found @@ -209,8 +209,8 @@ def _cdna_classification( return None def _genomic_classification( - self, token: HgvsToken, params: Dict - ) -> Optional[Classification]: + self, token: HgvsToken, params: dict + ) -> Classification | None: """Determine if hgvs token matches regex checks and return corresponding genomic classification if a match is found. Only checks against 'simple' duplication/deletions. @@ -274,8 +274,8 @@ def _genomic_classification( return None def _genomic_ambiguous_classification( - self, token: HgvsToken, params: Dict - ) -> Optional[Classification]: + self, token: HgvsToken, params: dict + ) -> Classification | None: """Determine if hgvs token matches regex checks and return corresponding genomic ambiguous classification if a match is found. Only checks against ambiguous duplication/deletions. @@ -295,7 +295,7 @@ def _genomic_ambiguous_classification( return None @staticmethod - def _update_ambiguous_params(params: Dict, regex_type: AmbiguousRegexType) -> None: + def _update_ambiguous_params(params: dict, regex_type: AmbiguousRegexType) -> None: """Mutates `params` to match correct types and gets associated ambiguous type from fields in `params` @@ -331,8 +331,8 @@ def _update_ambiguous_params(params: Dict, regex_type: AmbiguousRegexType) -> No params["ambiguous_type"] = ambiguous_type def _genomic_dup_ambiguous_classification( - self, token: HgvsToken, params: Dict - ) -> Optional[Classification]: + self, token: HgvsToken, params: dict + ) -> Classification | None: """Determine if hgvs token matches regex checks and return corresponding genomic ambiguous duplication classification if a match is found. Only checks against genomic ambiguous duplications. @@ -362,8 +362,8 @@ def _genomic_dup_ambiguous_classification( return None def _genomic_del_ambiguous_classification( - self, token: HgvsToken, params: Dict - ) -> Optional[Classification]: + self, token: HgvsToken, params: dict + ) -> Classification | None: """Determine if hgvs token matches regex checks and return corresponding genomic ambiguous deletion classification if a match is found. Only checks against genomic ambiguous deletion. diff --git a/src/variation/classifiers/protein_deletion_classifier.py b/src/variation/classifiers/protein_deletion_classifier.py index 402df0a4..a7ac2a12 100644 --- a/src/variation/classifiers/protein_deletion_classifier.py +++ b/src/variation/classifiers/protein_deletion_classifier.py @@ -1,5 +1,4 @@ """A module for the Protein Deletion Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class ProteinDeletionClassifier(Classifier): """The Protein Deletion Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the protein deletion classification. :return: List of list of tokens, where order matters, that represent a protein @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.PROTEIN_DELETION]] - def match(self, tokens: List[Token]) -> ProteinDeletionClassification: + def match(self, tokens: list[Token]) -> ProteinDeletionClassification: """Return the protein deletion classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/protein_delins_classifier.py b/src/variation/classifiers/protein_delins_classifier.py index 2574288e..c84c8203 100644 --- a/src/variation/classifiers/protein_delins_classifier.py +++ b/src/variation/classifiers/protein_delins_classifier.py @@ -1,5 +1,4 @@ """A module for the Protein DelIns Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class ProteinDelInsClassifier(Classifier): """The Protein DelIns Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the protein delins classification. :return: List of list of tokens, where order matters, that represent a protein @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.PROTEIN_DELINS]] - def match(self, tokens: List[Token]) -> ProteinDelInsClassification: + def match(self, tokens: list[Token]) -> ProteinDelInsClassification: """Return the protein delins classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/protein_insertion_classifier.py b/src/variation/classifiers/protein_insertion_classifier.py index e0f6cb4d..44942366 100644 --- a/src/variation/classifiers/protein_insertion_classifier.py +++ b/src/variation/classifiers/protein_insertion_classifier.py @@ -1,5 +1,4 @@ """A module for the Protein Insertion Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class ProteinInsertionClassifier(Classifier): """The Protein Insertion Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the protein insertion classification. :return: List of list of tokens, where order matters, that represent a protein @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.PROTEIN_INSERTION]] - def match(self, tokens: List[Token]) -> ProteinInsertionClassification: + def match(self, tokens: list[Token]) -> ProteinInsertionClassification: """Return the protein insertion classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/protein_reference_agree.py b/src/variation/classifiers/protein_reference_agree.py index b0c6cd91..fec70e7a 100644 --- a/src/variation/classifiers/protein_reference_agree.py +++ b/src/variation/classifiers/protein_reference_agree.py @@ -1,5 +1,4 @@ """A module for the Reference Agree Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class ProteinReferenceAgreeClassifier(Classifier): """The Reference Agree Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the protein reference agree classification. @@ -21,7 +20,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.PROTEIN_REFERENCE_AGREE]] - def match(self, tokens: List[Token]) -> ProteinReferenceAgreeClassification: + def match(self, tokens: list[Token]) -> ProteinReferenceAgreeClassification: """Return the protein reference agree classification from a list of token matches. diff --git a/src/variation/classifiers/protein_stop_gain_classifier.py b/src/variation/classifiers/protein_stop_gain_classifier.py index 1c959865..c18bf378 100644 --- a/src/variation/classifiers/protein_stop_gain_classifier.py +++ b/src/variation/classifiers/protein_stop_gain_classifier.py @@ -1,5 +1,4 @@ """A module for the Protein Stop Gain Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class ProteinStopGainClassifier(Classifier): """The Protein Stop Gain Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the protein stop gain classification. :return: List of list of tokens, where order matters, that represent a protein @@ -20,7 +19,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.PROTEIN_STOP_GAIN]] - def match(self, tokens: List[Token]) -> ProteinStopGainClassification: + def match(self, tokens: list[Token]) -> ProteinStopGainClassification: """Return the protein stop gain classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classifiers/protein_substitution_classifier.py b/src/variation/classifiers/protein_substitution_classifier.py index 7181f340..a2380bcb 100644 --- a/src/variation/classifiers/protein_substitution_classifier.py +++ b/src/variation/classifiers/protein_substitution_classifier.py @@ -1,5 +1,4 @@ """A module for the Protein Substitution Classifier.""" -from typing import List from variation.classifiers.classifier import Classifier from variation.schemas.classification_response_schema import ( @@ -12,7 +11,7 @@ class ProteinSubstitutionClassifier(Classifier): """The ProteinSubstitution Classifier class.""" - def exact_match_candidates(self) -> List[List[TokenType]]: + def exact_match_candidates(self) -> list[list[TokenType]]: """Return the token match candidates for the protein substitution classification. @@ -21,7 +20,7 @@ def exact_match_candidates(self) -> List[List[TokenType]]: """ return [[TokenType.GENE, TokenType.PROTEIN_SUBSTITUTION]] - def match(self, tokens: List[Token]) -> ProteinSubstitutionClassification: + def match(self, tokens: list[Token]) -> ProteinSubstitutionClassification: """Return the protein substitution classification from a list of token matches. :param tokens: List of ordered tokens that are exact match candidates for a diff --git a/src/variation/classify.py b/src/variation/classify.py index cd1a4728..5b109f4a 100644 --- a/src/variation/classify.py +++ b/src/variation/classify.py @@ -1,5 +1,6 @@ """Module for classification.""" -from typing import ClassVar, List, Optional + +from typing import ClassVar from variation.classifiers import ( AmplificationClassifier, @@ -35,7 +36,7 @@ class Classify: hgvs_classifier = HgvsClassifier() gnomad_vcf_classifier = GnomadVcfClassifier() - classifiers: ClassVar[List[Classifier]] = [ + classifiers: ClassVar[list[Classifier]] = [ ProteinDelInsClassifier(), ProteinSubstitutionClassifier(), ProteinStopGainClassifier(), @@ -59,7 +60,7 @@ class Classify: AmplificationClassifier(), ] - def perform(self, tokens: List[Token]) -> Optional[Classification]: + def perform(self, tokens: list[Token]) -> Classification | None: """Classify a list of tokens. :param tokens: List of tokens found diff --git a/src/variation/gnomad_vcf_to_protein_variation.py b/src/variation/gnomad_vcf_to_protein_variation.py index 2cb8e4b0..270dfe31 100644 --- a/src/variation/gnomad_vcf_to_protein_variation.py +++ b/src/variation/gnomad_vcf_to_protein_variation.py @@ -1,15 +1,16 @@ """Module for translating VCF-like to protein VRS Allele representation""" + import datetime -from typing import List, Optional, Tuple from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.mappers import ManeTranscript -from cool_seq_tool.schemas import ResidueMode, Strand -from ga4gh.core import core_models, ga4gh_identify +from cool_seq_tool.schemas import Strand +from ga4gh.core import domain_models, ga4gh_identify from ga4gh.vrs import models, normalize from gene.query import QueryHandler as GeneQueryHandler from gene.schemas import MatchType as GeneMatchType +from variation import __version__ from variation.classify import Classify from variation.schemas.classification_response_schema import Nomenclature from variation.schemas.gnomad_vcf_to_protein_schema import GnomadVcfToProteinService @@ -18,8 +19,8 @@ from variation.schemas.validation_response_schema import ValidationResult from variation.tokenize import Tokenize from variation.translate import Translate +from variation.utils import get_vrs_loc_seq from variation.validate import Validate -from variation.version import __version__ class GnomadVcfToProteinError(Exception): @@ -50,7 +51,7 @@ def _get_char_match_count( def _trim_prefix_or_suffix( aa_ref: str, aa_alt: str, aa_start_pos: int = 0, trim_prefix: bool = True -) -> Tuple[str, str, int]: +) -> tuple[str, str, int]: """Trim prefix or suffix matches :param aa_ref: Amino acid reference sequence @@ -67,7 +68,7 @@ def _trim_prefix_or_suffix( len_aa_alt = len(aa_alt) # Trim prefixes - range_len = len_aa_ref if len_aa_ref < len_aa_alt else len_aa_alt + range_len = min(len_aa_alt, len_aa_ref) aa_match = _get_char_match_count( range_len, aa_ref, aa_alt, trim_prefix=trim_prefix ) @@ -181,8 +182,8 @@ def __init__( self.gene_normalizer = gene_normalizer async def _get_valid_result( - self, vcf_query: str, warnings: List - ) -> List[ValidationResult]: + self, vcf_query: str, warnings: list + ) -> list[ValidationResult]: """Get gnomad vcf validation summary :param vcf_query: gnomad vcf input query @@ -221,7 +222,7 @@ async def _get_valid_result( @staticmethod def _get_alt_type_and_prefix_match( len_g_ref: int, len_g_alt: int, g_ref: str, g_alt: str - ) -> Tuple[AltType, int]: + ) -> tuple[AltType, int]: """Get genomic alteration type and number of prefixes match :param len_g_ref: Length of genomic reference sequence @@ -259,7 +260,7 @@ def _get_genomic_pos_range( strand: Strand, g_start_pos: int, g_end_pos: int, - ) -> Tuple[int, int, int]: + ) -> tuple[int, int, int]: """Get genomic positions to cover the range of codons :param c_start_pos: cDNA start position @@ -271,8 +272,8 @@ def _get_genomic_pos_range( for the original position change """ # Get cDNA reading frame - start_reading_frame = self.mane_transcript._get_reading_frame(c_start_pos + 1) - end_reading_frame = self.mane_transcript._get_reading_frame(c_end_pos) + start_reading_frame = self.mane_transcript._get_reading_frame(c_start_pos + 1) # noqa: SLF001 + end_reading_frame = self.mane_transcript._get_reading_frame(c_end_pos) # noqa: SLF001 # Get genomic position range change # This ensures that there 3 nucleotides needed for codon @@ -372,11 +373,17 @@ def _dna_to_aa(dna_seq: str, strand: Strand) -> str: return aa def _get_protein_representation( - self, ga4gh_seq_id: str, aa_start_pos: int, aa_end_pos: int, aa_alt: str + self, + ga4gh_seq_id: str, + p_ac: str, + aa_start_pos: int, + aa_end_pos: int, + aa_alt: str, ) -> models.Allele: """Create VRS Allele for protein representation :param ga4gh_seq_id: GA4GH identifier for protein accession + :param p_ac: RefSeq or Ensembl protein accession :param aa_start_pos: Protein start position (inter-residue coordinates) :param aa_end_pos: Protein end position (inter-residue coordinates) :param aa_alt: Protein alternate sequence @@ -402,12 +409,18 @@ def _get_protein_representation( msg = f"VRS-Python unable to normalize allele: {e}" raise GnomadVcfToProteinError(msg) from e + loc_seq = get_vrs_loc_seq( + self.seqrepo_access, p_ac, variation.location.start, variation.location.end + ) + if loc_seq: + variation.location.sequence = models.SequenceString(root=loc_seq) + # Add VRS digests for VRS Allele and VRS Sequence Location variation.id = ga4gh_identify(variation) variation.location.id = ga4gh_identify(variation.location) return variation - def _get_gene_context(self, gene: str) -> Optional[core_models.Gene]: + def _get_gene_context(self, gene: str) -> domain_models.Gene | None: """Get additional gene information from gene-normalizer :param gene: Gene symbol @@ -420,25 +433,6 @@ def _get_gene_context(self, gene: str) -> Optional[core_models.Gene]: else None ) - def _get_vrs_ref_allele_seq( - self, location: models.SequenceLocation, p_ac: str - ) -> Optional[str]: - """Return reference sequence given a VRS location. - - :param location: VRS Location object - :param identifier: Identifier for allele - :return: VRS ref seq allele - """ - start = location.start - end = location.end - if isinstance(start, int) and isinstance(end, int) and (start != end): - ref, _ = self.seqrepo_access.get_reference_sequence( - p_ac, start, end, residue_mode=ResidueMode.INTER_RESIDUE - ) - else: - ref = None - return ref - async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinService: """Get protein consequence for gnomAD-VCF like expression Assumes input query uses GRCh38 representation @@ -576,7 +570,7 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi # Create the protein VRS Allele try: variation = self._get_protein_representation( - p_ga4gh_seq_id, aa_start_pos, aa_end_pos, aa_alt + p_ga4gh_seq_id, p_ac, aa_start_pos, aa_end_pos, aa_alt ) except GnomadVcfToProteinError as e: warnings.append(str(e)) @@ -591,7 +585,6 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi return GnomadVcfToProteinService( variation_query=vcf_query, variation=variation, - vrs_ref_allele_seq=self._get_vrs_ref_allele_seq(variation.location, p_ac), gene_context=gene_context, warnings=warnings, service_meta_=ServiceMeta( diff --git a/src/variation/hgvs_dup_del_mode.py b/src/variation/hgvs_dup_del_mode.py index c7df9e1c..722c221e 100644 --- a/src/variation/hgvs_dup_del_mode.py +++ b/src/variation/hgvs_dup_del_mode.py @@ -1,5 +1,4 @@ """Module for hgvs_dup_del_mode in normalize endpoint.""" -from typing import Dict, List, Optional, Union from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.schemas import ResidueMode @@ -12,6 +11,25 @@ # Define deletion alt types DELS = {AltType.DELETION_AMBIGUOUS, AltType.DELETION} +# Define supported alt types for HGVS Dup Del Mode +DELS_DUPS = { + AltType.DELETION, + AltType.DELETION_AMBIGUOUS, + AltType.DUPLICATION, + AltType.DUPLICATION_AMBIGUOUS, +} + + +def _check_supported_alt_type(alt_type: AltType) -> None: + """Check that ``alt_type`` is one of ``DUP_DELS`` + + :param alt_type: Alteration type + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. + """ + if alt_type not in DELS_DUPS: + err_msg = f"`alt_type` must be one of: {DELS_DUPS}" + raise ValueError(err_msg) + class HGVSDupDelMode: """Class for handling how to interpret HGVS duplications and deletions.""" @@ -25,18 +43,13 @@ def __init__(self, seqrepo_access: SeqRepoAccess) -> None: def default_mode( self, - alt_type: Union[ - AltType.DELETION, - AltType.DELETION_AMBIGUOUS, - AltType.DUPLICATION, - AltType.DUPLICATION_AMBIGUOUS, - ], - location: Dict, + alt_type: AltType, + location: dict, vrs_seq_loc_ac: str, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, - alt: Optional[str] = None, - ) -> Optional[Dict]: + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, + alt: str | None = None, + ) -> dict | None: """Use default characteristics to return a variation. If baseline_copies not provided and endpoints are ambiguous - copy_number_change if copy_change not provided: @@ -46,14 +59,17 @@ def default_mode( else allele - :param alt_type: The type of alteration + :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: Sequence Location object :param vrs_seq_loc_ac: Accession used in VRS Sequence Location :param baseline_copies: Baseline copies for Copy Number Count variation :param copy_change: copy change for Copy Number Change Variation :param alt: Alteration + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: VRS Variation object represented as a dict """ + _check_supported_alt_type(alt_type) + variation = None if not baseline_copies and alt_type in AMBIGUOUS_REGIONS: variation = self.copy_number_change_mode(alt_type, location, copy_change) @@ -65,22 +81,20 @@ def default_mode( def copy_number_count_mode( self, - alt_type: Union[ - AltType.DELETION, - AltType.DELETION_AMBIGUOUS, - AltType.DUPLICATION, - AltType.DUPLICATION_AMBIGUOUS, - ], - location: Dict, + alt_type: AltType, + location: dict, baseline_copies: int, - ) -> Dict: + ) -> dict: """Return a VRS Copy Number Variation. - :param alt_type: The type of alteration + :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: VRS SequenceLocation :param baseline_copies: Baseline copies number + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: VRS Copy Number object represented as a dict """ + _check_supported_alt_type(alt_type) + copies = baseline_copies - 1 if alt_type in DELS else baseline_copies + 1 seq_loc = models.SequenceLocation(**location) seq_loc.id = ga4gh_identify(seq_loc) @@ -90,22 +104,20 @@ def copy_number_count_mode( def copy_number_change_mode( self, - alt_type: Union[ - AltType.DELETION, - AltType.DELETION_AMBIGUOUS, - AltType.DUPLICATION, - AltType.DUPLICATION_AMBIGUOUS, - ], - location: Dict, - copy_change: Optional[models.CopyChange] = None, - ) -> Dict: + alt_type: AltType, + location: dict, + copy_change: models.CopyChange | None = None, + ) -> dict: """Return copy number change variation - :param alt_type: The type of alteration + :param alt_type: The type of alteration. Must be one of ``DELS_DUPS``. :param location: VRS SequenceLocation :param copy_change: The copy change + :raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``. :return: Copy Number Change variation as a dict """ + _check_supported_alt_type(alt_type) + if not copy_change: copy_change = ( models.CopyChange.EFO_0030067 @@ -121,11 +133,11 @@ def copy_number_change_mode( def allele_mode( self, - location: Dict, + location: dict, alt_type: AltType, vrs_seq_loc_ac: str, alt: str, - ) -> Optional[Dict]: + ) -> dict | None: """Return a VRS Allele with a normalized LiteralSequenceExpression or ReferenceLengthExpression. @@ -170,14 +182,14 @@ def allele_mode( def interpret_variation( self, alt_type: AltType, - location: Dict, - errors: List, + location: dict, + errors: list, hgvs_dup_del_mode: HGVSDupDelModeOption, vrs_seq_loc_ac: str, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, - alt: Optional[str] = None, - ) -> Dict: + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, + alt: str | None = None, + ) -> dict: """Interpret variation using HGVSDupDelMode :param alt_type: Alteration type diff --git a/src/variation/log_config.py b/src/variation/log_config.py new file mode 100644 index 00000000..4360dfe8 --- /dev/null +++ b/src/variation/log_config.py @@ -0,0 +1,55 @@ +"""Provide functions and variables for logging setup. + +We may need to set up common logging protocols for a few different entry points when +running the Variation Normalizer as an app, but we shouldn't dictate anything to +downstream users. Functions in this module should only be called from locations in code +that are being executed directly by users, not from anywhere that might be imported as +a library. +""" + +import logging +import os + + +def _quiet_upstream_libs() -> None: + """Turn off debug logging for chatty upstream library loggers.""" + for lib in ( + "boto3", + "botocore", + "urllib3", + "hgvs.parser", + "biocommons.seqrepo.seqaliasdb.seqaliasdb", + "biocommons.seqrepo.fastadir.fastadir", + "asyncio", + ): + logging.getLogger(lib).setLevel(logging.INFO) + + +def configure_logging( + log_level: int = logging.DEBUG, quiet_upstream: bool = True +) -> None: + """Configure logging. + + :param log_level: global log level to set + :param quiet_upstream: if True, turn off debug logging for a selection of libraries + """ + log_filename = ( + "/tmp/variation.log" # noqa: S108 + if "VARIATION_NORM_EB_PROD" in os.environ + else "variation.log" + ) + if quiet_upstream: + _quiet_upstream_libs() + logging.basicConfig( + filename=log_filename, + format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s", + ) + logger = logging.getLogger("variation") + logger.setLevel(log_level) + + if "VARIATION_NORM_EB_PROD" in os.environ: + # force debug logging in production server + logger.handlers = [] + handler = logging.StreamHandler() + handler.setLevel(logging.DEBUG) + logger.addHandler(handler) diff --git a/src/variation/main.py b/src/variation/main.py index bdb675d2..006bb0a2 100644 --- a/src/variation/main.py +++ b/src/variation/main.py @@ -1,8 +1,11 @@ """Main application for FastAPI.""" + import datetime +import logging import traceback +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager from enum import Enum -from typing import List, Optional, Union from urllib.parse import unquote import pkg_resources @@ -10,19 +13,26 @@ from cool_seq_tool.schemas import Assembly, ResidueMode from fastapi import FastAPI, Query from ga4gh.vrs import models -from ga4gh.vrs.extras.translator import ValidationError as VrsPythonValidationError +from ga4gh.vrs.dataproxy import DataProxyValidationError from hgvs.exceptions import HGVSError from pydantic import ValidationError -from variation import logger +from variation import __version__ +from variation.log_config import configure_logging from variation.query import QueryHandler -from variation.schemas import ServiceMeta +from variation.schemas import NormalizeService, ServiceMeta, ToVRSService from variation.schemas.copy_number_schema import ( + AmplificationToCxVarService, ParsedToCnVarQuery, ParsedToCnVarService, ParsedToCxVarQuery, ParsedToCxVarService, ) +from variation.schemas.gnomad_vcf_to_protein_schema import GnomadVcfToProteinService +from variation.schemas.hgvs_to_copy_number_schema import ( + HgvsToCopyNumberChangeService, + HgvsToCopyNumberCountService, +) from variation.schemas.normalize_response_schema import ( HGVSDupDelModeOption, TranslateIdentifierService, @@ -40,7 +50,8 @@ TranslateToService, VrsPythonMeta, ) -from variation.version import __version__ + +_logger = logging.getLogger(__name__) class Tag(Enum): @@ -57,6 +68,17 @@ class Tag(Enum): query_handler = QueryHandler() +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncGenerator: # noqa: ARG001 + """Configure FastAPI instance lifespan. + + :param app: FastAPI app instance + :return: async context handler + """ + configure_logging() + yield + + app = FastAPI( title="The VICC Variation Normalizer", description="Services and guidelines for normalizing variations.", @@ -92,13 +114,15 @@ class Tag(Enum): @app.get( "/variation/to_vrs", summary=translate_summary, + response_model=ToVRSService, + response_model_exclude_none=True, response_description=translate_response_description, description=translate_description, tags=[Tag.MAIN], ) async def to_vrs( q: str = Query(..., description=q_description), -) -> dict: +) -> ToVRSService: """Translate a HGVS, gnomAD VCF and Free Text descriptions to VRS variation(s). Performs fully-justified allele normalization. Does not do any liftover operations or make any inferences about the query. @@ -106,8 +130,7 @@ async def to_vrs( :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly :return: ToVRSService model for variation """ - resp = await query_handler.to_vrs_handler.to_vrs(unquote(q)) - return resp.model_dump(exclude_none=True) + return await query_handler.to_vrs_handler.to_vrs(unquote(q)) normalize_summary = ( @@ -129,24 +152,26 @@ async def to_vrs( @app.get( "/variation/normalize", summary=normalize_summary, + response_model=NormalizeService, + response_model_exclude_none=True, response_description=normalize_response_description, description=normalize_description, tags=[Tag.MAIN], ) async def normalize( q: str = Query(..., description=q_description), - hgvs_dup_del_mode: Optional[HGVSDupDelModeOption] = Query( + hgvs_dup_del_mode: HGVSDupDelModeOption | None = Query( HGVSDupDelModeOption.DEFAULT, description=hgvs_dup_del_mode_decsr ), - baseline_copies: Optional[int] = Query( + baseline_copies: int | None = Query( None, description="Baseline copies for HGVS duplications and deletions represented as Copy Number Count Variation", ), - copy_change: Optional[models.CopyChange] = Query( + copy_change: models.CopyChange | None = Query( None, description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", ), -) -> dict: +) -> NormalizeService: """Normalize and translate a HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly to a single VRS Variation. Performs fully-justified allele normalization. Will liftover to GRCh38 and aligns to a priority transcript. Will @@ -162,13 +187,12 @@ async def normalize( query. :return: NormalizeService for variation """ - resp = await query_handler.normalize_handler.normalize( + return await query_handler.normalize_handler.normalize( unquote(q), hgvs_dup_del_mode=hgvs_dup_del_mode, baseline_copies=baseline_copies, copy_change=copy_change, ) - return resp.model_dump(exclude_none=True) @app.get( @@ -182,7 +206,7 @@ async def normalize( ) def translate_identifier( identifier: str = Query(..., description="The identifier to find aliases for"), - target_namespaces: Optional[str] = Query( + target_namespaces: str | None = Query( None, description="The namespaces of the aliases, separated by commas" ), ) -> TranslateIdentifierService: @@ -224,6 +248,8 @@ def translate_identifier( @app.get( "/variation/translate_from", summary="Given variation as beacon, gnomad, hgvs or spdi representation, return VRS Allele object using VRS-Python's AlleleTranslator class", + response_model=TranslateFromService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description="Return VRS Allele object", tags=[Tag.VRS_PYTHON], @@ -233,14 +259,14 @@ def vrs_python_translate_from( ..., description="Variation to translate to VRS object. Must be represented as either beacon, gnomad, hgvs, or spdi.", ), - fmt: Optional[TranslateFromFormat] = Query(None, description=from_fmt_descr), + fmt: TranslateFromFormat | None = Query(None, description=from_fmt_descr), assembly_name: str = Query( "GRCh38", description="Assembly used for `variation`. Only used for beacon and gnomad.", ), require_validation: bool = Query(True, description=require_validation_descr), - rle_seq_limit: Optional[int] = Query(50, description=rle_seq_limit_descr), -) -> dict: + rle_seq_limit: int | None = Query(50, description=rle_seq_limit_descr), +) -> TranslateFromService: """Given variation query, return VRS Allele object. This endpoint exposes vrs-python AlleleTranslator's translate_from method @@ -273,7 +299,7 @@ def vrs_python_translate_from( KeyError, ValueError, ValidationError, - VrsPythonValidationError, + DataProxyValidationError, ) as e: warnings.append(f"vrs-python translator raised {type(e).__name__}: {e}") except HGVSError as e: @@ -281,7 +307,7 @@ def vrs_python_translate_from( except BioutilsError as e: warnings.append(f"bioutils raised {type(e).__name__}: {e}") else: - vrs_variation = resp.model_dump(exclude_none=True) + vrs_variation = resp return TranslateFromService( query=TranslateFromQuery(variation=variation_query, fmt=fmt), @@ -294,7 +320,7 @@ def vrs_python_translate_from( vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version ), - ).model_dump(exclude_none=True) + ) g_to_p_summary = ( @@ -312,21 +338,22 @@ def vrs_python_translate_from( @app.get( "/variation/gnomad_vcf_to_protein", summary=g_to_p_summary, + response_model=GnomadVcfToProteinService, + response_model_exclude_none=True, response_description=g_to_p_response_description, description=g_to_p_description, tags=[Tag.TO_PROTEIN_VARIATION], ) async def gnomad_vcf_to_protein( q: str = Query(..., description=q_description), -) -> dict: +) -> GnomadVcfToProteinService: """Return VRS representation for variation on protein coordinate. :param q: gnomad VCF to normalize to protein variation. :return: GnomadVcfToProteinService for variation """ q = unquote(q.strip()) - resp = await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) - return resp.model_dump(exclude_none=True) + return await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) hgvs_dup_del_mode_decsr = ( @@ -335,8 +362,8 @@ async def gnomad_vcf_to_protein( def _get_allele( - request_body: Union[TranslateToQuery, TranslateToHGVSQuery], warnings: List -) -> Optional[models.Allele]: + request_body: TranslateToQuery | TranslateToHGVSQuery, warnings: list +) -> models.Allele | None: """Return VRS allele object from request body. `warnings` will get updated if exceptions are raised @@ -357,6 +384,8 @@ def _get_allele( "/variation/translate_to", summary="Given VRS Allele object as a dict, return variation expressed as " "queried format using vrs-python's translator class", + response_model=TranslateToService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description="Return variation in queried format representation. " "Request body must contain `variation` and `fmt`. `variation` is" @@ -364,7 +393,7 @@ def _get_allele( " `spdi` or `hgvs`", tags=[Tag.VRS_PYTHON], ) -async def vrs_python_translate_to(request_body: TranslateToQuery) -> dict: +async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateToService: """Given VRS Allele object as a dict, return variation expressed as queried format using vrs-python's translator class @@ -399,7 +428,7 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> dict: vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version ), - ).model_dump(exclude_none=True) + ) to_hgvs_descr = ( @@ -414,11 +443,13 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> dict: @app.post( "/variation/vrs_allele_to_hgvs", summary="Given VRS Allele object as a dict, return HGVS expression(s)", + response_model=TranslateToService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description=to_hgvs_descr, tags=[Tag.VRS_PYTHON], ) -async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> dict: +async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToService: """Given VRS Allele object as a dict, return variation expressed as HGVS expression(s) @@ -438,7 +469,7 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> dict: variations = [] if allele: try: - variations = query_handler.vrs_python_tlr._to_hgvs( + variations = query_handler.vrs_python_tlr._to_hgvs( # noqa: SLF001 allele, namespace=request_body.get("namespace") or "refseq" ) except ValueError as e: @@ -455,25 +486,27 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> dict: vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version ), - ).model_dump(exclude_none=True) + ) @app.get( "/variation/hgvs_to_copy_number_count", summary="Given HGVS expression, return VRS Copy Number Count Variation", + response_model=HgvsToCopyNumberCountService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description="Return VRS Copy Number Count Variation", tags=[Tag.TO_COPY_NUMBER_VARIATION], ) async def hgvs_to_copy_number_count( hgvs_expr: str = Query(..., description="Variation query"), - baseline_copies: Optional[int] = Query( + baseline_copies: int | None = Query( ..., description="Baseline copies for duplication" ), do_liftover: bool = Query( False, description="Whether or not to liftover " "to GRCh38 assembly." ), -) -> dict: +) -> HgvsToCopyNumberCountService: """Given hgvs expression, return copy number count variation :param hgvs_expr: HGVS expression @@ -481,17 +514,18 @@ async def hgvs_to_copy_number_count( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberCountService """ - resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( + return await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( unquote(hgvs_expr.strip()), baseline_copies, do_liftover, ) - return resp.model_dump(exclude_none=True) @app.get( "/variation/hgvs_to_copy_number_change", summary="Given HGVS expression, return VRS Copy Number Change Variation", + response_model=HgvsToCopyNumberChangeService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description="Return VRS Copy Number Change Variation", tags=[Tag.TO_COPY_NUMBER_VARIATION], @@ -502,7 +536,7 @@ async def hgvs_to_copy_number_change( do_liftover: bool = Query( False, description="Whether or not to liftover " "to GRCh38 assembly." ), -) -> dict: +) -> HgvsToCopyNumberChangeService: """Given hgvs expression, return copy number change variation :param hgvs_expr: HGVS expression @@ -510,23 +544,24 @@ async def hgvs_to_copy_number_change( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberChangeService """ - resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( + return await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( unquote(hgvs_expr.strip()), copy_change, do_liftover, ) - return resp.model_dump(exclude_none=True) @app.post( "/variation/parsed_to_cn_var", summary="Given parsed genomic components, return VRS Copy Number Count " "Variation", + response_model=ParsedToCnVarService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description="Return VRS Copy Number Count Variation", tags=[Tag.TO_COPY_NUMBER_VARIATION], ) -def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> dict: +def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: """Given parsed genomic components, return Copy Number Count Variation. :param request_body: Request body @@ -534,10 +569,10 @@ def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> dict: warnings """ try: - resp = query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) + return query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) except Exception: traceback_resp = traceback.format_exc().splitlines() - logger.exception(traceback_resp) + _logger.exception(traceback_resp) return ParsedToCnVarService( copy_number_count=None, @@ -547,19 +582,19 @@ def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> dict: response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) - else: - return resp.model_dump(exclude_none=True) @app.post( "/variation/parsed_to_cx_var", summary="Given parsed genomic components, return VRS Copy Number Change " "Variation", + response_model=ParsedToCxVarService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description="Return VRS Copy Number Change Variation", tags=[Tag.TO_COPY_NUMBER_VARIATION], ) -def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> dict: +def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: """Given parsed genomic components, return Copy Number Change Variation :param request_body: Request body @@ -567,10 +602,10 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> dict: warnings """ try: - resp = query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) + return query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) except Exception: traceback_resp = traceback.format_exc().splitlines() - logger.exception(traceback_resp) + _logger.exception(traceback_resp) return ParsedToCxVarService( copy_number_count=None, @@ -580,8 +615,6 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> dict: response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) - else: - return resp.model_dump(exclude_none=True) amplification_to_cx_var_descr = ( @@ -596,18 +629,18 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> dict: @app.get( "/variation/amplification_to_cx_var", summary="Given amplification query, return VRS Copy Number Change Variation", + response_model=AmplificationToCxVarService, + response_model_exclude_none=True, response_description="A response to a validly-formed query.", description=amplification_to_cx_var_descr, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) def amplification_to_cx_var( gene: str = Query(..., description="Gene query"), - sequence_id: Optional[str] = Query(None, description="Sequence identifier"), - start: Optional[int] = Query( - None, description="Start position as residue coordinate" - ), - end: Optional[int] = Query(None, description="End position as residue coordinate"), -) -> dict: + sequence_id: str | None = Query(None, description="Sequence identifier"), + start: int | None = Query(None, description="Start position as residue coordinate"), + end: int | None = Query(None, description="End position as residue coordinate"), +) -> AmplificationToCxVarService: """Given amplification query, return Copy Number Change Variation Parameter priority: 1. sequence, start, end (must provide ALL) @@ -628,7 +661,7 @@ def amplification_to_cx_var( sequence_id=sequence_id, start=start, end=end, - ).model_dump(exclude_none=True) + ) @app.get( @@ -664,7 +697,7 @@ async def p_to_c( p_ac, p_start_pos, p_end_pos, residue_mode ) except Exception as e: - logger.error("Unhandled exception: %s", str(e)) + _logger.error("Unhandled exception: %s", str(e)) w = "Unhandled exception. See logs for more information." c_data = None return ToCdnaService( @@ -691,7 +724,7 @@ async def c_to_g( c_ac: str = Query(..., description="cDNA RefSeq accession"), c_start_pos: int = Query(..., description="cDNA start position for codon"), c_end_pos: int = Query(..., description="cDNA end position for codon"), - cds_start: Optional[int] = Query( + cds_start: int | None = Query( None, description="CDS start site. If not provided, this will be computed." ), residue_mode: ResidueMode = Query( @@ -724,7 +757,7 @@ async def c_to_g( target_genome_assembly=target_genome_assembly, ) except Exception as e: - logger.error("Unhandled exception: %s", str(e)) + _logger.error("Unhandled exception: %s", str(e)) w = "Unhandled exception. See logs for more information." g_data = None return ToGenomicService( @@ -778,7 +811,7 @@ async def p_to_g( target_genome_assembly=target_genome_assembly, ) except Exception as e: - logger.error("Unhandled exception: %s", str(e)) + _logger.error("Unhandled exception: %s", str(e)) w = "Unhandled exception. See logs for more information." g_data = None return ToGenomicService( diff --git a/src/variation/normalize.py b/src/variation/normalize.py index 699d7f81..b7759984 100644 --- a/src/variation/normalize.py +++ b/src/variation/normalize.py @@ -1,14 +1,16 @@ """Module for Variation Normalization.""" + import datetime -from typing import List, Optional, Tuple from urllib.parse import unquote from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.sources import UtaDatabase from ga4gh.vrs import models +from variation import __version__ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ClassificationType from variation.schemas.normalize_response_schema import ( HGVSDupDelModeOption, NormalizeService, @@ -20,12 +22,12 @@ TranslationResult, VrsSeqLocAcStatus, ) +from variation.schemas.validation_response_schema import ValidationSummary from variation.to_vrs import ToVRS from variation.tokenize import Tokenize from variation.translate import Translate -from variation.utils import update_warnings_for_no_resp +from variation.utils import get_vrs_loc_seq, update_warnings_for_no_resp from variation.validate import Validate -from variation.version import __version__ class Normalize(ToVRS): @@ -60,8 +62,8 @@ def __init__( @staticmethod def _get_priority_translation_result( - translations: List[TranslationResult], ac_status: VrsSeqLocAcStatus - ) -> Optional[TranslationResult]: + translations: list[TranslationResult], ac_status: VrsSeqLocAcStatus + ) -> TranslationResult | None: """Get prioritized translation result. Tries to find translation results with the same `vrs_seq_loc_ac_status` as `ac_status`. If more than one translation result is found, will try to find translation result where `og_ac` (original @@ -108,10 +110,10 @@ def _get_priority_translation_result( @staticmethod def get_hgvs_dup_del_mode( - tokens: List[Token], - hgvs_dup_del_mode: Optional[HGVSDupDelModeOption] = None, - baseline_copies: Optional[int] = None, - ) -> Tuple[Optional[HGVSDupDelModeOption], Optional[str]]: + tokens: list[Token], + hgvs_dup_del_mode: HGVSDupDelModeOption | None = None, + baseline_copies: int | None = None, + ) -> tuple[HGVSDupDelModeOption | None, str | None]: """Get option to use for hgvs dup del mode :param tokens: List of tokens found in an input query @@ -138,14 +140,46 @@ def get_hgvs_dup_del_mode( return hgvs_dup_del_mode, warning + def _get_location_seq( + self, + validation_summary: ValidationSummary, + variation: dict, + priority_translation_result: TranslationResult, + ) -> str | None: + """Get reference sequence for a Sequence Location + + Does not support: + - Ambiguous genomic deletions or duplications + - Amplifications + - Variations that are not Allele or Copy Number + + :param validation_summary: Validation summary for classification containing + valid and invalid results + :param variation: VRS Variation object + :param priority_translation_result: Prioritized translation result + :return: Reference sequence for a sequence location if found + """ + valid_result = validation_summary.valid_results[0] + classification_type = valid_result.classification.classification_type + if classification_type not in { + ClassificationType.GENOMIC_DELETION_AMBIGUOUS, + ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, + ClassificationType.AMPLIFICATION, + } and variation["type"] in {"Allele", "CopyNumberChange", "CopyNumberCount"}: + return get_vrs_loc_seq( + self.seqrepo_access, + priority_translation_result.vrs_seq_loc_ac, + variation["location"]["start"], + variation["location"]["end"], + ) + return None + async def normalize( self, q: str, - hgvs_dup_del_mode: Optional[ - HGVSDupDelModeOption - ] = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption | None = HGVSDupDelModeOption.DEFAULT, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, ) -> NormalizeService: """Normalize a given variation. @@ -233,26 +267,11 @@ async def normalize( try: variation = translation_result.vrs_variation except AttributeError as e: - # vrs_ref_allele_seq = None warnings.append(str(e)) else: - pass - # valid_result = validation_summary.valid_results[0] - # classification_type = valid_result.classification.classification_type - # if classification_type not in { - # ClassificationType.GENOMIC_DELETION_AMBIGUOUS, - # ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, - # ClassificationType.AMPLIFICATION, - # }: - # variation_type = variation["type"] - # if variation_type in { - # "Allele", "CopyNumberChange", "CopyNumberCount" - # }: - # vrs_ref_allele_seq = self.get_ref_allele_seq( - # variation["location"], translation_result.vrs_seq_loc_ac - # ) - # else: - # vrs_ref_allele_seq = None + variation["location"]["sequence"] = self._get_location_seq( + validation_summary, variation, translation_result + ) if not variation: update_warnings_for_no_resp(label, warnings) diff --git a/src/variation/query.py b/src/variation/query.py index c0c68a3e..26e995c3 100644 --- a/src/variation/query.py +++ b/src/variation/query.py @@ -1,5 +1,4 @@ """Module for providing methods for handling queries.""" -from typing import Optional from cool_seq_tool.app import CoolSeqTool from ga4gh.vrs.extras.translator import AlleleTranslator as VrsPythonTranslator @@ -23,7 +22,7 @@ class QueryHandler: def __init__( self, - gene_query_handler: Optional[GeneQueryHandler] = None, + gene_query_handler: GeneQueryHandler | None = None, ) -> None: """Initialize QueryHandler instance. :param gene_query_handler: Gene normalizer query handler instance. If this is @@ -45,8 +44,13 @@ def __init__( mane_transcript = cool_seq_tool.mane_transcript transcript_mappings = cool_seq_tool.transcript_mappings self.vrs_python_tlr = VrsPythonTranslator(data_proxy=self.seqrepo_access) + liftover = cool_seq_tool.liftover validator = Validate( - self.seqrepo_access, transcript_mappings, uta_db, gene_query_handler + self.seqrepo_access, + transcript_mappings, + uta_db, + gene_query_handler, + liftover, ) hgvs_dup_del_mode = HGVSDupDelMode(self.seqrepo_access) translator = Translate( @@ -69,5 +73,5 @@ def __init__( *[*to_vrs_params, mane_transcript, gene_query_handler] ) self.to_copy_number_handler = ToCopyNumberVariation( - *[*to_vrs_params, gene_query_handler, uta_db] + *[*to_vrs_params, gene_query_handler, uta_db, liftover] ) diff --git a/src/variation/regex.py b/src/variation/regex.py index 182e98c7..1d112712 100644 --- a/src/variation/regex.py +++ b/src/variation/regex.py @@ -1,6 +1,7 @@ """Module containing regex patterns""" + import re -from typing import Any, List, Tuple +from typing import Any from variation.schemas.app_schemas import AmbiguousRegexType from variation.schemas.classification_response_schema import ClassificationType @@ -90,7 +91,7 @@ # classification type # Note: Order matters for regexprs -PROTEIN_REGEXPRS: List[Tuple[Any, TokenType, ClassificationType]] = [ +PROTEIN_REGEXPRS: list[tuple[Any, TokenType, ClassificationType]] = [ (PROTEIN_DELINS, TokenType.PROTEIN_DELINS, ClassificationType.PROTEIN_DELINS), (PROTEIN_DELETION, TokenType.PROTEIN_DELETION, ClassificationType.PROTEIN_DELETION), ( @@ -111,7 +112,7 @@ ] # Note: Order matters for regexprs -CDNA_REGEXPRS: List[Tuple[Any, TokenType, ClassificationType]] = [ +CDNA_REGEXPRS: list[tuple[Any, TokenType, ClassificationType]] = [ (CDNA_GENOMIC_DELINS, TokenType.CDNA_DELINS, ClassificationType.CDNA_DELINS), (CNDA_GENOMIC_DELETION, TokenType.CDNA_DELETION, ClassificationType.CDNA_DELETION), ( @@ -132,7 +133,7 @@ ] # Note: Order matters for regexprs -GENOMIC_REGEXPRS: List[Tuple[Any, TokenType, ClassificationType]] = [ +GENOMIC_REGEXPRS: list[tuple[Any, TokenType, ClassificationType]] = [ (CDNA_GENOMIC_DELINS, TokenType.GENOMIC_DELINS, ClassificationType.GENOMIC_DELINS), ( CNDA_GENOMIC_DELETION, @@ -163,8 +164,8 @@ # Note: Order matters for regexprs -GENOMIC_DUP_AMBIGUOUS_REGEXPRS: List[ - Tuple[Any, TokenType, ClassificationType, AmbiguousRegexType] +GENOMIC_DUP_AMBIGUOUS_REGEXPRS: list[ + tuple[Any, TokenType, ClassificationType, AmbiguousRegexType] ] = [ ( GENOMIC_DUPLICATION_AMBIGUOUS_1, @@ -188,8 +189,8 @@ # Note: Order matters for regexprs -GENOMIC_DEL_AMBIGUOUS_REGEXPRS: List[ - Tuple[Any, TokenType, ClassificationType, AmbiguousRegexType] +GENOMIC_DEL_AMBIGUOUS_REGEXPRS: list[ + tuple[Any, TokenType, ClassificationType, AmbiguousRegexType] ] = [ ( GENOMIC_DELETION_AMBIGUOUS_1, diff --git a/src/variation/schemas/__init__.py b/src/variation/schemas/__init__.py index e6bfc5d6..40a7591e 100644 --- a/src/variation/schemas/__init__.py +++ b/src/variation/schemas/__init__.py @@ -1,4 +1,5 @@ """Package level import.""" + from .normalize_response_schema import NormalizeService, ServiceMeta from .to_vrs_response_schema import ToVRSService diff --git a/src/variation/schemas/app_schemas.py b/src/variation/schemas/app_schemas.py index 17f9d09b..15c71b29 100644 --- a/src/variation/schemas/app_schemas.py +++ b/src/variation/schemas/app_schemas.py @@ -1,4 +1,5 @@ """Module for schemas used throughout the app""" + from enum import Enum, IntEnum diff --git a/src/variation/schemas/classification_response_schema.py b/src/variation/schemas/classification_response_schema.py index 9b66fd9f..475661b9 100644 --- a/src/variation/schemas/classification_response_schema.py +++ b/src/variation/schemas/classification_response_schema.py @@ -1,6 +1,7 @@ """Module for Classification schema.""" + from enum import Enum -from typing import List, Literal, Optional +from typing import Literal from pydantic import BaseModel, StrictStr @@ -58,146 +59,146 @@ class Classification(BaseModel): """Classification for a list of tokens.""" classification_type: ClassificationType - matching_tokens: List[Token] = [] + matching_tokens: list[Token] = [] nomenclature: Nomenclature - gene_token: Optional[GeneToken] = None - ac: Optional[StrictStr] = None + gene_token: GeneToken | None = None + ac: StrictStr | None = None class ProteinSubstitutionClassification(Classification, Substitution): """Define protein substitution classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.PROTEIN_SUBSTITUTION] = ( ClassificationType.PROTEIN_SUBSTITUTION - ] = ClassificationType.PROTEIN_SUBSTITUTION + ) class GenomicSubstitutionClassification(Classification, Substitution): """Define genomic substitution classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_SUBSTITUTION] = ( ClassificationType.GENOMIC_SUBSTITUTION - ] = ClassificationType.GENOMIC_SUBSTITUTION + ) class CdnaSubstitutionClassification(Classification, Substitution): """Define cdna substitution classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.CDNA_SUBSTITUTION] = ( ClassificationType.CDNA_SUBSTITUTION - ] = ClassificationType.CDNA_SUBSTITUTION + ) class ProteinStopGainClassification(Classification, StopGain): """Define protein stop gain classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.PROTEIN_STOP_GAIN] = ( ClassificationType.PROTEIN_STOP_GAIN - ] = ClassificationType.PROTEIN_STOP_GAIN + ) class ProteinReferenceAgreeClassification(Classification, ProteinReferenceAgree): """Define protein reference agree classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.PROTEIN_REFERENCE_AGREE] = ( ClassificationType.PROTEIN_REFERENCE_AGREE - ] = ClassificationType.PROTEIN_REFERENCE_AGREE + ) class CdnaReferenceAgreeClassification(Classification, ReferenceAgree): """Define cdna reference agree classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.CDNA_REFERENCE_AGREE] = ( ClassificationType.CDNA_REFERENCE_AGREE - ] = ClassificationType.CDNA_REFERENCE_AGREE + ) class GenomicReferenceAgreeClassification(Classification, ReferenceAgree): """Define genomic reference agree classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_REFERENCE_AGREE] = ( ClassificationType.GENOMIC_REFERENCE_AGREE - ] = ClassificationType.GENOMIC_REFERENCE_AGREE + ) class ProteinInsertionClassification(Classification, ProteinInsertion): """Define protein insertion classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.PROTEIN_INSERTION] = ( ClassificationType.PROTEIN_INSERTION - ] = ClassificationType.PROTEIN_INSERTION + ) class CdnaInsertionClassification(Classification, Insertion): """Define cdna insertion classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.CDNA_INSERTION] = ( ClassificationType.CDNA_INSERTION - ] = ClassificationType.CDNA_INSERTION + ) class GenomicInsertionClassification(Classification, Insertion): """Define genomic insertion classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_INSERTION] = ( ClassificationType.GENOMIC_INSERTION - ] = ClassificationType.GENOMIC_INSERTION + ) class ProteinDeletionClassification(Classification, ProteinDeletion): """Define protein deletion classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.PROTEIN_DELETION] = ( ClassificationType.PROTEIN_DELETION - ] = ClassificationType.PROTEIN_DELETION + ) class GenomicDeletionClassification(Classification, Deletion): """Define genomic deletion classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_DELETION] = ( ClassificationType.GENOMIC_DELETION - ] = ClassificationType.GENOMIC_DELETION + ) class CdnaDeletionClassification(Classification, Deletion): """Define cdna classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.CDNA_DELETION] = ( ClassificationType.CDNA_DELETION - ] = ClassificationType.CDNA_DELETION + ) class ProteinDelInsClassification(Classification, ProteinDelIns): """Define protein delins classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.PROTEIN_DELINS] = ( ClassificationType.PROTEIN_DELINS - ] = ClassificationType.PROTEIN_DELINS + ) class CdnaDelInsClassification(Classification, DelIns): """Define cdna delins classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.CDNA_DELINS] = ( ClassificationType.CDNA_DELINS - ] = ClassificationType.CDNA_DELINS + ) class GenomicDelInsClassification(Classification, DelIns): """Define genomic delins classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_DELINS] = ( ClassificationType.GENOMIC_DELINS - ] = ClassificationType.GENOMIC_DELINS + ) class GenomicDuplicationClassification(Classification, Duplication): """Define genomic duplication classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_DUPLICATION] = ( ClassificationType.GENOMIC_DUPLICATION - ] = ClassificationType.GENOMIC_DUPLICATION + ) class AmbiguousType(str, Enum): @@ -215,24 +216,24 @@ class AmbiguousType(str, Enum): class GenomicDuplicationAmbiguousClassification(Classification, DupDelAmbiguous): """Define genomic duplication ambiguous classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS] = ( ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS - ] = ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS + ) ambiguous_type: AmbiguousType class GenomicDeletionAmbiguousClassification(Classification, DupDelAmbiguous): """Define genomic deletion ambiguous classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.GENOMIC_DELETION_AMBIGUOUS] = ( ClassificationType.GENOMIC_DELETION_AMBIGUOUS - ] = ClassificationType.GENOMIC_DELETION_AMBIGUOUS + ) ambiguous_type: AmbiguousType class AmplificationClassification(Classification): """Define amplification classification""" - classification_type: Literal[ + classification_type: Literal[ClassificationType.AMPLIFICATION] = ( ClassificationType.AMPLIFICATION - ] = ClassificationType.AMPLIFICATION + ) diff --git a/src/variation/schemas/copy_number_schema.py b/src/variation/schemas/copy_number_schema.py index cf53717f..0af71e15 100644 --- a/src/variation/schemas/copy_number_schema.py +++ b/src/variation/schemas/copy_number_schema.py @@ -1,7 +1,7 @@ """Module containing schemas for services""" + import re from enum import Enum -from typing import Dict, Optional from ga4gh.vrs import models from pydantic import ( @@ -14,8 +14,8 @@ model_validator, ) +from variation import __version__ from variation.schemas.normalize_response_schema import ServiceResponse -from variation.version import __version__ class ParsedPosType(str, Enum): @@ -44,7 +44,7 @@ class ClinVarAssembly(str, Enum): HG18 = "hg18" -def validate_parsed_fields(cls, v: Dict) -> Dict: +def validate_parsed_fields(cls, v: dict) -> dict: # noqa: ARG001 """Validate base copy number query fields - `accession` or both `assembly` and `chromosome` must be provided - `start1` is required when `start_pos_type` is a definite @@ -99,20 +99,20 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: class ParsedToCopyNumberQuery(BaseModel): """Define base model for parsed to copy number queries""" - assembly: Optional[ClinVarAssembly] = Field( + assembly: ClinVarAssembly | None = Field( default=None, description=( "Assembly. Ignored, along with `chromosome`, if `accession` is " "provided." ), ) - chromosome: Optional[StrictStr] = Field( + chromosome: StrictStr | None = Field( default=None, description=( "Chromosome. Must contain `chr` prefix, i.e. 'chr7'. Must provide " "when `assembly` is provided." ), ) - accession: Optional[StrictStr] = Field( + accession: StrictStr | None = Field( default=None, description=( "Genomic RefSeq accession. If `accession` is provided, will " @@ -132,7 +132,7 @@ class ParsedToCopyNumberQuery(BaseModel): "range, this will be the min end position." ), ) - start_pos_comparator: Optional[Comparator] = Field( + start_pos_comparator: Comparator | None = Field( default=None, description=( "Must provide when `start_pos_type` is an Indefinite Range. " @@ -140,7 +140,7 @@ class ParsedToCopyNumberQuery(BaseModel): "(#_?), set to '<='. To represent (?_#), set to '>='." ), ) - end_pos_comparator: Optional[Comparator] = Field( + end_pos_comparator: Comparator | None = Field( default=None, description=( "Must provide when `end_pos_type` is an Indefinite Range. " @@ -156,14 +156,14 @@ class ParsedToCopyNumberQuery(BaseModel): default=ParsedPosType.NUMBER, description="Type of the end value in the VRS SequenceLocation", ) - start1: Optional[StrictInt] = Field( + start1: StrictInt | None = Field( default=None, description=( "Only provided when `start_pos_type` is a Definite Range, this " "will be the max start position." ), ) - end1: Optional[StrictInt] = Field( + end1: StrictInt | None = Field( default=None, description=( "Only provided when `end_pos_type` is a Definite Range, this " @@ -185,7 +185,7 @@ class ParsedToCnVarQuery(ParsedToCopyNumberQuery): "is an Definite Range, this will be the `min` copies." ), ) - copies1: Optional[StrictInt] = Field( + copies1: StrictInt | None = Field( default=None, description=( "Must provide when `copies_type` is a Definite Range. This will " @@ -196,7 +196,7 @@ class ParsedToCnVarQuery(ParsedToCopyNumberQuery): default=ParsedPosType.NUMBER, description="Type for the `copies` in the `location`", ) - copies_comparator: Optional[Comparator] = Field( + copies_comparator: Comparator | None = Field( default=None, description=( "Must provide when `copies_type` is an Indefinite Range. " @@ -205,7 +205,7 @@ class ParsedToCnVarQuery(ParsedToCopyNumberQuery): ) @model_validator(mode="after") - def validate_fields(cls, v: Dict) -> Dict: + def validate_fields(cls, v: dict) -> dict: """Validate fields. - `copies1` should exist when `copies_type == ParsedPosType.DEFINITE_RANGE` @@ -251,7 +251,7 @@ def validate_fields(cls, v: Dict) -> Dict: class ParsedToCnVarService(ServiceResponse): """A response for translating parsed components to Copy Number Count""" - copy_number_count: Optional[models.CopyNumberCount] = None + copy_number_count: models.CopyNumberCount | None = None model_config = ConfigDict( json_schema_extra={ @@ -290,7 +290,7 @@ class ParsedToCxVarQuery(ParsedToCopyNumberQuery): copy_change: models.CopyChange @model_validator(mode="after") - def validate_fields(cls, v: Dict) -> Dict: + def validate_fields(cls, v: dict) -> dict: """Validate fields""" validate_parsed_fields(cls, v) return v @@ -317,7 +317,7 @@ def validate_fields(cls, v: Dict) -> Dict: class ParsedToCxVarService(ServiceResponse): """A response for translating parsed components to Copy Number Change""" - copy_number_change: Optional[models.CopyNumberChange] = None + copy_number_change: models.CopyNumberChange | None = None model_config = ConfigDict( json_schema_extra={ @@ -354,18 +354,18 @@ class AmplificationToCxVarQuery(BaseModel): """Define query for amplification to copy number change variation endpoint""" gene: str - sequence_id: Optional[str] = None - start: Optional[int] = None - end: Optional[int] = None - sequence_location: Optional[models.SequenceLocation] = None + sequence_id: str | None = None + start: int | None = None + end: int | None = None + sequence_location: models.SequenceLocation | None = None class AmplificationToCxVarService(ServiceResponse): """A response for translating Amplification queries to Copy Number Change""" - query: Optional[AmplificationToCxVarQuery] = None - amplification_label: Optional[str] - copy_number_change: Optional[models.CopyNumberChange] + query: AmplificationToCxVarQuery | None = None + amplification_label: str | None + copy_number_change: models.CopyNumberChange | None model_config = ConfigDict( json_schema_extra={ diff --git a/src/variation/schemas/gnomad_vcf_to_protein_schema.py b/src/variation/schemas/gnomad_vcf_to_protein_schema.py index 2eb7dc6f..435d4e98 100644 --- a/src/variation/schemas/gnomad_vcf_to_protein_schema.py +++ b/src/variation/schemas/gnomad_vcf_to_protein_schema.py @@ -1,8 +1,6 @@ """Module for gnomad vcf to protein response schema""" -from typing import Optional -from ga4gh.core import core_models -from pydantic import StrictStr +from ga4gh.core import domain_models from variation.schemas.normalize_response_schema import NormalizeService @@ -10,5 +8,4 @@ class GnomadVcfToProteinService(NormalizeService): """Define response for gnomad vcf to protein service""" - gene_context: Optional[core_models.Gene] = None - vrs_ref_allele_seq: Optional[StrictStr] = None + gene_context: domain_models.Gene | None = None diff --git a/src/variation/schemas/hgvs_to_copy_number_schema.py b/src/variation/schemas/hgvs_to_copy_number_schema.py index 9ea9f60d..b1dad176 100644 --- a/src/variation/schemas/hgvs_to_copy_number_schema.py +++ b/src/variation/schemas/hgvs_to_copy_number_schema.py @@ -1,18 +1,17 @@ """Module containing schemas used in HGVS To Copy Number endpoints""" -from typing import Optional from ga4gh.vrs import models from pydantic import ConfigDict, StrictStr +from variation import __version__ from variation.schemas.normalize_response_schema import ServiceResponse -from variation.version import __version__ class HgvsToCopyNumberCountService(ServiceResponse): """A response for translating HGVS to copy number count.""" hgvs_expr: StrictStr - copy_number_count: Optional[models.CopyNumberCount] = None + copy_number_count: models.CopyNumberCount | None = None model_config = ConfigDict( json_schema_extra={ @@ -50,7 +49,7 @@ class HgvsToCopyNumberChangeService(ServiceResponse): """A response for translating HGVS to copy number change.""" hgvs_expr: StrictStr - copy_number_change: Optional[models.CopyNumberChange] = None + copy_number_change: models.CopyNumberChange | None = None model_config = ConfigDict( json_schema_extra={ diff --git a/src/variation/schemas/normalize_response_schema.py b/src/variation/schemas/normalize_response_schema.py index 3bdec411..76e1a958 100644 --- a/src/variation/schemas/normalize_response_schema.py +++ b/src/variation/schemas/normalize_response_schema.py @@ -1,12 +1,13 @@ """Module for normalize endpoint response schema.""" + from datetime import datetime from enum import Enum -from typing import List, Literal, Optional, Union +from typing import Literal from ga4gh.vrs import models from pydantic import BaseModel, ConfigDict, StrictStr, model_validator -from variation.version import __version__ +from variation import __version__ class HGVSDupDelModeOption(str, Enum): @@ -26,9 +27,9 @@ class ServiceMeta(BaseModel): name: Literal["variation-normalizer"] = "variation-normalizer" version: StrictStr response_datetime: datetime - url: Literal[ + url: Literal["https://github.com/cancervariants/variation-normalization"] = ( "https://github.com/cancervariants/variation-normalization" - ] = "https://github.com/cancervariants/variation-normalization" + ) model_config = ConfigDict( json_schema_extra={ @@ -45,7 +46,7 @@ class ServiceMeta(BaseModel): class ServiceResponse(BaseModel): """Base response model for services""" - warnings: List[StrictStr] = [] + warnings: list[StrictStr] = [] service_meta_: ServiceMeta @model_validator(mode="after") @@ -73,9 +74,9 @@ class NormalizeService(ServiceResponse): """A response to normalizing a variation to a single GA4GH VRS Variation""" variation_query: StrictStr - variation: Optional[ - Union[models.Allele, models.CopyNumberCount, models.CopyNumberChange] - ] = None + variation: ( + models.Allele | models.CopyNumberCount | models.CopyNumberChange | None + ) = None model_config = ConfigDict( json_schema_extra={ @@ -113,7 +114,7 @@ class TranslateIdentifierService(ServiceResponse): """A response to translating identifiers.""" identifier_query: StrictStr - aliases: List[StrictStr] = [] + aliases: list[StrictStr] = [] model_config = ConfigDict( json_schema_extra={ diff --git a/src/variation/schemas/service_schema.py b/src/variation/schemas/service_schema.py index acc1466d..87526131 100644 --- a/src/variation/schemas/service_schema.py +++ b/src/variation/schemas/service_schema.py @@ -1,12 +1,13 @@ """Module containing schemas for services""" + from enum import Enum from cool_seq_tool.schemas import ToCdnaService as ToCdna from cool_seq_tool.schemas import ToGenomicService as ToGenomic from pydantic import ConfigDict +from variation import __version__ from variation.schemas.normalize_response_schema import ServiceMeta -from variation.version import __version__ class ClinVarAssembly(str, Enum): diff --git a/src/variation/schemas/to_vrs_response_schema.py b/src/variation/schemas/to_vrs_response_schema.py index f5dcafd3..a52e138f 100644 --- a/src/variation/schemas/to_vrs_response_schema.py +++ b/src/variation/schemas/to_vrs_response_schema.py @@ -1,23 +1,22 @@ """Module for to_vrs endpoint response schema.""" -from typing import List, Union from ga4gh.vrs import models from pydantic import BaseModel, ConfigDict, StrictStr +from variation import __version__ from variation.schemas.normalize_response_schema import ServiceMeta -from variation.version import __version__ class ToVRSService(BaseModel): """Define model for translation response.""" search_term: StrictStr - warnings: List[StrictStr] = [] - variations: Union[ - List[models.Allele], - List[models.CopyNumberCount], - List[models.CopyNumberChange], - ] = [] + warnings: list[StrictStr] = [] + variations: ( + list[models.Allele] + | list[models.CopyNumberCount] + | list[models.CopyNumberChange] + ) = [] service_meta_: ServiceMeta model_config = ConfigDict( diff --git a/src/variation/schemas/token_response_schema.py b/src/variation/schemas/token_response_schema.py index e7b84db6..10b4ce70 100644 --- a/src/variation/schemas/token_response_schema.py +++ b/src/variation/schemas/token_response_schema.py @@ -1,9 +1,10 @@ """Module for schemas related to tokenization.""" + from enum import Enum -from typing import Literal, Optional +from typing import Literal from cool_seq_tool.schemas import AnnotationLayer -from ga4gh.core import core_models +from ga4gh.core import domain_models from pydantic import BaseModel, StrictInt, StrictStr from variation.schemas.app_schemas import AmbiguousRegexType @@ -131,9 +132,9 @@ class ProteinStopGainToken(Token, StopGain): class ProteinReferenceAgreeToken(Token, ProteinReferenceAgree): """Token for reference agree on protein reference sequence""" - token_type: Literal[ + token_type: Literal[TokenType.PROTEIN_REFERENCE_AGREE] = ( TokenType.PROTEIN_REFERENCE_AGREE - ] = TokenType.PROTEIN_REFERENCE_AGREE + ) coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN @@ -148,9 +149,9 @@ class GenomicReferenceAgreeToken(Token, ReferenceAgree): """Token for reference agree on genomic reference sequence""" coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC - token_type: Literal[ + token_type: Literal[TokenType.GENOMIC_REFERENCE_AGREE] = ( TokenType.GENOMIC_REFERENCE_AGREE - ] = TokenType.GENOMIC_REFERENCE_AGREE + ) class ProteinDeletionToken(Token, ProteinDeletion): @@ -177,9 +178,9 @@ class GenomicDeletionToken(Token, Deletion): class GenomicDeletionAmbiguousToken(Token, DupDelAmbiguous): """Token for ambiguous deletion on genomic reference sequence""" - token_type: Literal[ + token_type: Literal[TokenType.GENOMIC_DELETION_AMBIGUOUS] = ( TokenType.GENOMIC_DELETION_AMBIGUOUS - ] = TokenType.GENOMIC_DELETION_AMBIGUOUS + ) coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC ambiguous_regex_type: AmbiguousRegexType @@ -236,9 +237,9 @@ class GenomicDuplicationToken(Token, Duplication): class GenomicDuplicationAmbiguousToken(Token, DupDelAmbiguous): """Ambiguous duplication on genomic reference sequence""" - token_type: Literal[ + token_type: Literal[TokenType.GENOMIC_DUPLICATION_AMBIGUOUS] = ( TokenType.GENOMIC_DUPLICATION_AMBIGUOUS - ] = TokenType.GENOMIC_DUPLICATION_AMBIGUOUS + ) coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC ambiguous_regex_type: AmbiguousRegexType @@ -254,4 +255,4 @@ class GeneToken(Token): matched_value: StrictStr token_type: Literal[TokenType.GENE] = TokenType.GENE - gene: Optional[core_models.Gene] = None + gene: domain_models.Gene | None = None diff --git a/src/variation/schemas/translation_response_schema.py b/src/variation/schemas/translation_response_schema.py index 73ab363f..426bcc3c 100644 --- a/src/variation/schemas/translation_response_schema.py +++ b/src/variation/schemas/translation_response_schema.py @@ -1,6 +1,6 @@ """Module for Translation Response Schema.""" + from enum import Enum -from typing import Dict, Optional from cool_seq_tool.schemas import TranscriptPriority from pydantic import BaseModel, StrictStr @@ -27,8 +27,8 @@ class VrsSeqLocAcStatus(str, Enum): class TranslationResult(BaseModel): """Translation Result""" - vrs_variation: Optional[Dict] = {} - vrs_seq_loc_ac: Optional[StrictStr] = None + vrs_variation: dict | None = {} + vrs_seq_loc_ac: StrictStr | None = None vrs_seq_loc_ac_status: VrsSeqLocAcStatus = VrsSeqLocAcStatus.NA - og_ac: Optional[StrictStr] = None + og_ac: StrictStr | None = None validation_result: ValidationResult diff --git a/src/variation/schemas/validation_response_schema.py b/src/variation/schemas/validation_response_schema.py index 00e8eb2e..150129ee 100644 --- a/src/variation/schemas/validation_response_schema.py +++ b/src/variation/schemas/validation_response_schema.py @@ -1,5 +1,4 @@ """Module for Validation Response Schema.""" -from typing import List, Optional from pydantic import BaseModel, StrictBool, StrictInt, StrictStr @@ -9,16 +8,16 @@ class ValidationResult(BaseModel): """Validation Results for a given input""" - accession: Optional[StrictStr] = None - cds_start: Optional[StrictInt] = None # This is only for cDNA + accession: StrictStr | None = None + cds_start: StrictInt | None = None # This is only for cDNA classification: Classification is_valid: StrictBool - errors: List[StrictStr] = [] + errors: list[StrictStr] = [] class ValidationSummary(BaseModel): """Give Valid and Invalid Results for a given input.""" - valid_results: List[ValidationResult] = [] - invalid_results: List[ValidationResult] = [] - warnings: List[StrictStr] = [] + valid_results: list[ValidationResult] = [] + invalid_results: list[ValidationResult] = [] + warnings: list[StrictStr] = [] diff --git a/src/variation/schemas/variation_schema.py b/src/variation/schemas/variation_schema.py index b384df72..d12f0b9b 100644 --- a/src/variation/schemas/variation_schema.py +++ b/src/variation/schemas/variation_schema.py @@ -1,5 +1,6 @@ """Define supported variation types""" -from typing import Literal, Optional, Union + +from typing import Literal from pydantic import BaseModel, StrictInt, StrictStr @@ -22,15 +23,15 @@ class Deletion(BaseModel): """Define model for deletion variation""" pos0: StrictInt - pos1: Optional[StrictInt] = None - deleted_sequence: Optional[StrictStr] = None + pos1: StrictInt | None = None + deleted_sequence: StrictStr | None = None class ProteinDeletion(Deletion): """Define model for protein deletion""" aa0: StrictStr - aa1: Optional[StrictStr] = None + aa1: StrictStr | None = None class Insertion(BaseModel): @@ -64,7 +65,7 @@ class DelIns(BaseModel): """Define model for delins variation""" pos0: StrictInt - pos1: Optional[StrictInt] = None + pos1: StrictInt | None = None inserted_sequence: StrictStr @@ -72,20 +73,20 @@ class ProteinDelIns(DelIns): """Define model for protein delins variation""" aa0: StrictStr - aa1: Optional[StrictStr] = None + aa1: StrictStr | None = None class Duplication(BaseModel): """Define model for duplication variation""" pos0: StrictInt - pos1: Optional[StrictInt] = None + pos1: StrictInt | None = None class DupDelAmbiguous(BaseModel): """Define model for duplication/deletion ambiguous variation""" - pos0: Union[StrictInt, Literal["?"]] - pos1: Optional[Union[StrictInt, Literal["?"]]] = None - pos2: Union[StrictInt, Literal["?"]] - pos3: Optional[Union[StrictInt, Literal["?"]]] = None + pos0: StrictInt | Literal["?"] + pos1: StrictInt | Literal["?"] | None = None + pos2: StrictInt | Literal["?"] + pos3: StrictInt | Literal["?"] | None = None diff --git a/src/variation/schemas/vrs_python_translator_schema.py b/src/variation/schemas/vrs_python_translator_schema.py index 844f4b6c..85fbbb5d 100644 --- a/src/variation/schemas/vrs_python_translator_schema.py +++ b/src/variation/schemas/vrs_python_translator_schema.py @@ -1,6 +1,7 @@ """Module for vrs-python translator endpoint response schema""" + from enum import Enum -from typing import List, Literal, Optional, Union +from typing import Literal from ga4gh.vrs import models from pydantic import BaseModel, ConfigDict, StrictStr @@ -13,9 +14,9 @@ class VrsPythonMeta(BaseModel): name: Literal["vrs-python"] = "vrs-python" version: StrictStr - url: Literal[ + url: Literal["https://github.com/ga4gh/vrs-python"] = ( "https://github.com/ga4gh/vrs-python" - ] = "https://github.com/ga4gh/vrs-python" + ) class TranslateFromFormat(str, Enum): @@ -70,7 +71,7 @@ class TranslateToHGVSQuery(BaseModel): """Query fields for Translate To HGVS Service""" variation: models.Allele - namespace: Optional[str] = None + namespace: str | None = None model_config = ConfigDict( json_schema_extra={ @@ -102,14 +103,14 @@ class TranslateFromQuery(BaseModel): """Query fields for Translate From Service""" variation: StrictStr - fmt: Optional[TranslateFromFormat] = None + fmt: TranslateFromFormat | None = None class TranslateService(BaseModel): """Response schema for vrs-python translator endpoints""" - query: Union[TranslateFromQuery, TranslateToQuery, TranslateToHGVSQuery] - warnings: List[StrictStr] = [] + query: TranslateFromQuery | TranslateToQuery | TranslateToHGVSQuery + warnings: list[StrictStr] = [] service_meta_: ServiceMeta vrs_python_meta_: VrsPythonMeta @@ -117,10 +118,10 @@ class TranslateService(BaseModel): class TranslateFromService(TranslateService): """Response schema for vrs-python translate from endpoint""" - variation: Optional[models.Allele] = None + variation: models.Allele | None = None class TranslateToService(TranslateService): """Response schema for vrs-python translate to endpoint""" - variations: List[StrictStr] + variations: list[StrictStr] diff --git a/src/variation/to_copy_number_variation.py b/src/variation/to_copy_number_variation.py index 60745779..8eaba668 100644 --- a/src/variation/to_copy_number_variation.py +++ b/src/variation/to_copy_number_variation.py @@ -1,9 +1,12 @@ """Module for to copy number variation translation""" + import datetime -from typing import Dict, List, NamedTuple, Optional, Tuple, Union +from typing import NamedTuple from urllib.parse import unquote from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.mappers import LiftOver +from cool_seq_tool.schemas import Assembly from cool_seq_tool.sources import UtaDatabase from ga4gh.core import ga4gh_identify from ga4gh.vrs import models @@ -11,6 +14,7 @@ from gene.schemas import MatchType as GeneMatchType from pydantic import ValidationError +from variation import __version__ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint from variation.schemas.classification_response_schema import ClassificationType @@ -38,9 +42,8 @@ from variation.to_vrs import ToVRS from variation.tokenize import Tokenize from variation.translate import Translate -from variation.utils import get_priority_sequence_location +from variation.utils import get_priority_sequence_location, get_vrs_loc_seq from variation.validate import Validate -from variation.version import __version__ VALID_CLASSIFICATION_TYPES = [ ClassificationType.GENOMIC_DUPLICATION, @@ -81,6 +84,7 @@ def __init__( translator: Translate, gene_normalizer: GeneQueryHandler, uta: UtaDatabase, + liftover: LiftOver, ) -> None: """Initialize theToCopyNumberVariation class @@ -91,12 +95,14 @@ def __init__( :param translator: Instance for translating valid results to VRS representations :param gene_normalizer: Client for normalizing gene concepts :param uta: Access to UTA queries + :param liftover: Instance to provide mapping between human genome assemblies """ super().__init__(seqrepo_access, tokenizer, classifier, validator, translator) self.gene_normalizer = gene_normalizer self.uta = uta + self.liftover = liftover - async def _get_valid_results(self, q: str) -> Tuple[List[ValidationResult], List]: + async def _get_valid_results(self, q: str) -> tuple[list[ValidationResult], list]: """Get valid results for to copy number variation endpoint :param q: Input query string @@ -146,13 +152,11 @@ async def _hgvs_to_cnv_resp( self, copy_number_type: HGVSDupDelModeOption, do_liftover: bool, - valid_results: Tuple[List[ValidationResult], List[str]], - warnings: List[str], - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, - ) -> Tuple[ - Optional[Union[models.CopyNumberCount, models.CopyNumberChange]], List[str] - ]: + valid_results: tuple[list[ValidationResult], list[str]], + warnings: list[str], + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, + ) -> tuple[models.CopyNumberCount | models.CopyNumberChange | None, list[str]]: """Return copy number variation and warnings response :param copy_number_type: The type of copy number variation. Must be either @@ -180,7 +184,14 @@ async def _hgvs_to_cnv_resp( do_liftover=do_liftover, ) if translations: - variation = translations[0].vrs_variation + translation_result = translations[0] + variation = translation_result.vrs_variation + variation["location"]["sequence"] = get_vrs_loc_seq( + self.seqrepo_access, + translation_result.vrs_seq_loc_ac, + variation["location"]["start"], + variation["location"]["end"], + ) if variation: if copy_number_type == HGVSDupDelModeOption.COPY_NUMBER_COUNT: @@ -225,7 +236,7 @@ async def hgvs_to_copy_number_count( async def hgvs_to_copy_number_change( self, hgvs_expr: str, - copy_change: Optional[models.CopyChange], + copy_change: models.CopyChange | None, do_liftover: bool = False, ) -> HgvsToCopyNumberChangeService: """Given hgvs, return copy number change variation @@ -372,9 +383,9 @@ def _get_vrs_loc_start_or_end( pos0: int, pos_type: ParsedPosType, is_start: bool = True, - pos1: Optional[int] = None, - comparator: Optional[Comparator] = None, - ) -> Union[int, models.Range]: + pos1: int | None = None, + comparator: Comparator | None = None, + ) -> int | models.Range: """Get VRS Sequence Location start and end values :param accession: Genomic accession for sequence @@ -415,12 +426,12 @@ def _get_parsed_seq_loc( start_pos_type: ParsedPosType, end0: int, end_pos_type: ParsedPosType, - start1: Optional[int] = None, - end1: Optional[int] = None, + start1: int | None = None, + end1: int | None = None, liftover_pos: bool = False, - start_pos_comparator: Optional[Comparator] = None, - end_pos_comparator: Optional[Comparator] = None, - ) -> Tuple[Optional[Dict], Optional[str]]: + start_pos_comparator: Comparator | None = None, + end_pos_comparator: Comparator | None = None, + ) -> tuple[dict | None, str | None]: """Get sequence location for parsed components. Accession will be validated. :param accession: Genomic accession for sequence @@ -489,6 +500,9 @@ def _get_parsed_seq_loc( sequenceReference=models.SequenceReference(refgetAccession=sequence), start=start_vrs, end=end_vrs, + sequence=get_vrs_loc_seq( + self.seqrepo_access, accession, start_vrs, end_vrs + ), ) seq_loc.id = ga4gh_identify(seq_loc) @@ -499,9 +513,9 @@ def _liftover_pos( chromosome: str, start0: int, end0: int, - start1: Optional[int], - end1: Optional[int], - ) -> Dict: + start1: int | None, + end1: int | None, + ) -> dict: """Liftover GRCh37 positions to GRCh38 positions :param chromosome: Chromosome. Must be contain 'chr' prefix, i.e 'chr7'. @@ -526,20 +540,18 @@ def _liftover_pos( ("end1", end1), ]: if pos is not None: - liftover = self.uta.liftover_37_to_38.convert_coordinate( - chromosome, pos - ) + liftover = self.liftover.get_liftover(chromosome, pos, Assembly.GRCH38) if not liftover: msg = f"Unable to liftover: {chromosome} with pos {pos}" raise ToCopyNumberError(msg) - liftover_pos[k] = liftover[0][1] + liftover_pos[k] = liftover[1] return liftover_pos def parsed_to_copy_number( - self, request_body: Union[ParsedToCnVarQuery, ParsedToCxVarQuery] - ) -> Union[ParsedToCnVarService, ParsedToCxVarService]: + self, request_body: ParsedToCnVarQuery | ParsedToCxVarQuery + ) -> ParsedToCnVarService | ParsedToCxVarService: """Given parsed genomic components, return Copy Number Count or Copy Number Change Variation @@ -629,9 +641,9 @@ def parsed_to_copy_number( def amplification_to_cx_var( self, gene: str, - sequence_id: Optional[str] = None, - start: Optional[int] = None, - end: Optional[int] = None, + sequence_id: str | None = None, + start: int | None = None, + end: int | None = None, ) -> AmplificationToCxVarService: """Return Copy Number Change Variation for Amplification query Parameter priority: diff --git a/src/variation/to_vrs.py b/src/variation/to_vrs.py index eca7b347..bfed644b 100644 --- a/src/variation/to_vrs.py +++ b/src/variation/to_vrs.py @@ -1,11 +1,12 @@ """Module for to_vrs endpoint.""" + import datetime -from typing import List, Optional, Tuple from urllib.parse import unquote from cool_seq_tool.handlers import SeqRepoAccess from ga4gh.vrs import models +from variation import __version__ from variation.classify import Classify from variation.schemas.app_schemas import Endpoint from variation.schemas.normalize_response_schema import ( @@ -17,8 +18,8 @@ from variation.schemas.validation_response_schema import ValidationResult from variation.tokenize import Tokenize from variation.translate import Translate +from variation.utils import get_vrs_loc_seq from variation.validate import Validate -from variation.version import __version__ from variation.vrs_representation import VRSRepresentation @@ -49,14 +50,14 @@ def __init__( async def get_translations( self, - valid_results: List[ValidationResult], - warnings: List, - endpoint_name: Optional[Endpoint] = None, + valid_results: list[ValidationResult], + warnings: list, + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Tuple[List[TranslationResult], List[str]]: + ) -> tuple[list[TranslationResult], list[str]]: """Get translation results :param valid_results: List of valid results for a given input @@ -88,6 +89,31 @@ async def get_translations( return translations, warnings + def _get_vrs_variations(self, translations: list[TranslationResult]) -> list[dict]: + """Get translated VRS Variations. + + This method will also add ``sequence`` to the variation's location + + :param translations: List of translation results + :return: List of unique VRS Variations + """ + variations = [] + _added_variation_ids = set() + + # Ensure only unique VRS variations are in the list of variations returned + for tr in translations: + if tr.vrs_variation["id"] not in _added_variation_ids: + vrs_variation = tr.vrs_variation + vrs_variation["location"]["sequence"] = get_vrs_loc_seq( + self.seqrepo_access, + tr.vrs_seq_loc_ac, + vrs_variation["location"]["start"], + vrs_variation["location"]["end"], + ) + variations.append(vrs_variation) + _added_variation_ids.add(vrs_variation["id"]) + return variations + async def to_vrs(self, q: str) -> ToVRSService: """Return a VRS-like representation of all validated variations for a query. @@ -134,15 +160,6 @@ async def to_vrs(self, q: str) -> ToVRSService: translations = [] warnings = validation_summary.warnings - if not translations: - variations = [] - else: - variations = [] - # Ensure only unique VRS variations are in the list of variations returned - for tr in translations: - if tr.vrs_variation not in variations: - variations.append(tr.vrs_variation) - params["warnings"] = warnings - params["variations"] = variations + params["variations"] = self._get_vrs_variations(translations) return ToVRSService(**params) diff --git a/src/variation/tokenize.py b/src/variation/tokenize.py index 26d2f494..c3ad87ab 100644 --- a/src/variation/tokenize.py +++ b/src/variation/tokenize.py @@ -1,5 +1,4 @@ """A module for tokenization.""" -from typing import List from variation.schemas.token_response_schema import Token, TokenType from variation.tokenizers import ( @@ -34,7 +33,7 @@ class Tokenize: def __init__(self, gene_symbol: GeneSymbol) -> None: """Initialize the tokenize class.""" self.gene_symbol = gene_symbol - self.tokenizers: List[Tokenizer] = [ + self.tokenizers: list[Tokenizer] = [ HGVS(), GnomadVCF(), self.gene_symbol, @@ -62,7 +61,7 @@ def __init__(self, gene_symbol: GeneSymbol) -> None: GenomicDuplication(), ] - def perform(self, search_string: str, warnings: List[str]) -> List[Token]: + def perform(self, search_string: str, warnings: list[str]) -> list[Token]: """Return a list of tokens for a given search string :param search_string: The input string to search on @@ -71,7 +70,7 @@ def perform(self, search_string: str, warnings: List[str]) -> List[Token]: """ terms = search_string.split() - tokens: List[Token] = [] + tokens: list[Token] = [] for term in terms: if not term: continue @@ -80,7 +79,7 @@ def perform(self, search_string: str, warnings: List[str]) -> List[Token]: for tokenizer in self.tokenizers: res = tokenizer.match(term) if res: - if isinstance(res, List): + if isinstance(res, list): for r in res: tokens.append(r) if not matched: diff --git a/src/variation/tokenizers/__init__.py b/src/variation/tokenizers/__init__.py index 992e698c..7962d8d3 100644 --- a/src/variation/tokenizers/__init__.py +++ b/src/variation/tokenizers/__init__.py @@ -1,4 +1,5 @@ """Module to load and init namespace at package level.""" + from .cdna_and_genomic_reference_agree import CdnaGenomicReferenceAgree from .cdna_deletion import CdnaDeletion from .cdna_delins import CdnaDelIns diff --git a/src/variation/tokenizers/cdna_and_genomic_reference_agree.py b/src/variation/tokenizers/cdna_and_genomic_reference_agree.py index ab69d65c..9eaa42d9 100644 --- a/src/variation/tokenizers/cdna_and_genomic_reference_agree.py +++ b/src/variation/tokenizers/cdna_and_genomic_reference_agree.py @@ -1,5 +1,4 @@ """A module for Reference Agree Tokenization on cDNA and genomic reference sequence.""" -from typing import Optional, Union from cool_seq_tool.schemas import AnnotationLayer @@ -16,7 +15,7 @@ class CdnaGenomicReferenceAgree(Tokenizer): def match( self, input_string: str - ) -> Optional[Union[CdnaReferenceAgreeToken, GenomicReferenceAgreeToken]]: + ) -> CdnaReferenceAgreeToken | GenomicReferenceAgreeToken | None: """Return a CdnaReferenceAgreeToken or GenomicReferenceAgreeToken match if one exists. diff --git a/src/variation/tokenizers/cdna_deletion.py b/src/variation/tokenizers/cdna_deletion.py index 9b949201..dfd13461 100644 --- a/src/variation/tokenizers/cdna_deletion.py +++ b/src/variation/tokenizers/cdna_deletion.py @@ -1,5 +1,4 @@ """A module for Cdna Deletion Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -11,7 +10,7 @@ class CdnaDeletion(Tokenizer): """Class for tokenizing Deletion at the cdna reference sequence.""" - def match(self, input_string: str) -> Optional[CdnaDeletionToken]: + def match(self, input_string: str) -> CdnaDeletionToken | None: """Return a CdnaDeletionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/cdna_delins.py b/src/variation/tokenizers/cdna_delins.py index beda5786..1b618761 100644 --- a/src/variation/tokenizers/cdna_delins.py +++ b/src/variation/tokenizers/cdna_delins.py @@ -1,5 +1,4 @@ """A module for Cdna Deletion Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -11,7 +10,7 @@ class CdnaDelIns(Tokenizer): """Class for tokenizing delins at the cdna reference sequence.""" - def match(self, input_string: str) -> Optional[CdnaDelInsToken]: + def match(self, input_string: str) -> CdnaDelInsToken | None: """Return a CdnaDelInsToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/cdna_insertion.py b/src/variation/tokenizers/cdna_insertion.py index 2a0d5417..3ddb18d6 100644 --- a/src/variation/tokenizers/cdna_insertion.py +++ b/src/variation/tokenizers/cdna_insertion.py @@ -1,5 +1,4 @@ """A module for Cdna Insertion Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -11,7 +10,7 @@ class CdnaInsertion(Tokenizer): """Class for tokenizing Insertion at the cdna reference sequence.""" - def match(self, input_string: str) -> Optional[CdnaInsertionToken]: + def match(self, input_string: str) -> CdnaInsertionToken | None: """Return a CdnaInsertionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/cdna_substitution.py b/src/variation/tokenizers/cdna_substitution.py index 59f36158..7ed1f26a 100644 --- a/src/variation/tokenizers/cdna_substitution.py +++ b/src/variation/tokenizers/cdna_substitution.py @@ -1,5 +1,4 @@ """A module for Cdna Substitution Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -11,7 +10,7 @@ class CdnaSubstitution(Tokenizer): """Class for tokenizing Substitution at the cdna reference sequence.""" - def match(self, input_string: str) -> Optional[CdnaSubstitutionToken]: + def match(self, input_string: str) -> CdnaSubstitutionToken | None: """Return a CdnaSubstitutionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/free_text_categorical.py b/src/variation/tokenizers/free_text_categorical.py index 7edec067..a190253d 100644 --- a/src/variation/tokenizers/free_text_categorical.py +++ b/src/variation/tokenizers/free_text_categorical.py @@ -1,5 +1,4 @@ """A module for free text categorical variation tokenization""" -from typing import Optional from variation.schemas.token_response_schema import AmplificationToken from variation.tokenizers.tokenizer import Tokenizer @@ -8,7 +7,7 @@ class FreeTextCategorical(Tokenizer): """The Free Text Categorical tokenizer class""" - def match(self, input_string: str) -> Optional[AmplificationToken]: + def match(self, input_string: str) -> AmplificationToken | None: """Return tokens that match the input string. Only supports amplification for now diff --git a/src/variation/tokenizers/gene_symbol.py b/src/variation/tokenizers/gene_symbol.py index 04e98c02..df07af4c 100644 --- a/src/variation/tokenizers/gene_symbol.py +++ b/src/variation/tokenizers/gene_symbol.py @@ -1,5 +1,4 @@ """Module for Gene Symbol tokenization.""" -from typing import Optional from gene.query import QueryHandler as GeneQueryHandler @@ -17,7 +16,7 @@ def __init__(self, gene_normalizer: GeneQueryHandler) -> None: """ self.gene_normalizer = gene_normalizer - def match(self, input_string: str) -> Optional[GeneToken]: + def match(self, input_string: str) -> GeneToken | None: """Return tokens that are genes :param input_string: Input string diff --git a/src/variation/tokenizers/genomic_deletion.py b/src/variation/tokenizers/genomic_deletion.py index 8d9c1630..a15a66e4 100644 --- a/src/variation/tokenizers/genomic_deletion.py +++ b/src/variation/tokenizers/genomic_deletion.py @@ -1,5 +1,4 @@ """A module for Genomic Deletion Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -20,7 +19,7 @@ class GenomicDeletion(Tokenizer): """Class for tokenizing Deletion at the genomic reference sequence.""" - def match(self, input_string: str) -> Optional[GenomicDeletionToken]: + def match(self, input_string: str) -> GenomicDeletionToken | None: """Return a GenomicDeletionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/genomic_delins.py b/src/variation/tokenizers/genomic_delins.py index 888d3145..a3f94676 100644 --- a/src/variation/tokenizers/genomic_delins.py +++ b/src/variation/tokenizers/genomic_delins.py @@ -1,5 +1,4 @@ """A module for Genomic DelIns Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -13,7 +12,7 @@ class GenomicDelIns(Tokenizer): genomic reference sequence. """ - def match(self, input_string: str) -> Optional[GenomicDelInsToken]: + def match(self, input_string: str) -> GenomicDelInsToken | None: """Return a GenomicDelInsToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/genomic_duplication.py b/src/variation/tokenizers/genomic_duplication.py index a20e1f6d..4079604a 100644 --- a/src/variation/tokenizers/genomic_duplication.py +++ b/src/variation/tokenizers/genomic_duplication.py @@ -1,5 +1,4 @@ """A module for Genomic Duplication Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -20,7 +19,7 @@ class GenomicDuplication(Tokenizer): """Class for tokenizing duplications on the genomic coordinate.""" - def match(self, input_string: str) -> Optional[GenomicDuplicationToken]: + def match(self, input_string: str) -> GenomicDuplicationToken | None: """Return a GenomicDelInsToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/genomic_insertion.py b/src/variation/tokenizers/genomic_insertion.py index 3319fd50..44094076 100644 --- a/src/variation/tokenizers/genomic_insertion.py +++ b/src/variation/tokenizers/genomic_insertion.py @@ -1,5 +1,4 @@ """A module for Genomic Insertion Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -13,7 +12,7 @@ class GenomicInsertion(Tokenizer): """Class for tokenizing Insertion at the genomic reference sequence.""" - def match(self, input_string: str) -> Optional[GenomicInsertionToken]: + def match(self, input_string: str) -> GenomicInsertionToken | None: """Return a GenomicInsertionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/genomic_substitution.py b/src/variation/tokenizers/genomic_substitution.py index 3cafed25..42bf2058 100644 --- a/src/variation/tokenizers/genomic_substitution.py +++ b/src/variation/tokenizers/genomic_substitution.py @@ -1,5 +1,4 @@ """A module for Genomic Substitution Tokenization.""" -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -15,7 +14,7 @@ class GenomicSubstitution(Tokenizer): reference sequence. """ - def match(self, input_string: str) -> Optional[GenomicSubstitutionToken]: + def match(self, input_string: str) -> GenomicSubstitutionToken | None: """Return a GenomicSubstitutionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/gnomad_vcf.py b/src/variation/tokenizers/gnomad_vcf.py index 6cf309b0..6fc228ac 100644 --- a/src/variation/tokenizers/gnomad_vcf.py +++ b/src/variation/tokenizers/gnomad_vcf.py @@ -1,6 +1,6 @@ """A module for gnomad VCF tokenization""" + import re -from typing import Optional from variation.schemas.token_response_schema import GnomadVcfToken from variation.tokenizers.tokenizer import Tokenizer @@ -15,7 +15,7 @@ class GnomadVCF(Tokenizer): re.IGNORECASE, ) - def match(self, input_string: str) -> Optional[GnomadVcfToken]: + def match(self, input_string: str) -> GnomadVcfToken | None: """Return a GnomadVCFToken if a match exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/hgvs.py b/src/variation/tokenizers/hgvs.py index c98e33a9..66f89829 100644 --- a/src/variation/tokenizers/hgvs.py +++ b/src/variation/tokenizers/hgvs.py @@ -1,6 +1,6 @@ """Module for HGVS tokenization.""" + import re -from typing import Optional from cool_seq_tool.schemas import AnnotationLayer @@ -15,7 +15,7 @@ class HGVS(Tokenizer): r"^(?P(NC_|NM_|NP_|ENSP|ENST)[^:\s]+):(?P[cgnpr])\.(?P\S+)$" ) - def match(self, input_string: str) -> Optional[HgvsToken]: + def match(self, input_string: str) -> HgvsToken | None: """Return HGVS token matches from input string. :param input_string: The input string to match diff --git a/src/variation/tokenizers/protein_deletion.py b/src/variation/tokenizers/protein_deletion.py index eb459a8b..bfb42fba 100644 --- a/src/variation/tokenizers/protein_deletion.py +++ b/src/variation/tokenizers/protein_deletion.py @@ -1,6 +1,6 @@ """A module for tokenizing Protein Deletions.""" + import re -from typing import Optional from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -18,7 +18,7 @@ class ProteinDeletion(Tokenizer): ) splitter = re.compile(pattern) - def match(self, input_string: str) -> Optional[ProteinDeletionToken]: + def match(self, input_string: str) -> ProteinDeletionToken | None: """Return a ProteinDeletionToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/protein_delins.py b/src/variation/tokenizers/protein_delins.py index 5bfe086e..15aebfee 100644 --- a/src/variation/tokenizers/protein_delins.py +++ b/src/variation/tokenizers/protein_delins.py @@ -1,5 +1,4 @@ """A module for Protein DelIns Tokenization Class.""" -from typing import Optional from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -11,7 +10,7 @@ class ProteinDelIns(Tokenizer): """Class for tokenizing DelIns on the protein reference sequence.""" - def match(self, input_string: str) -> Optional[ProteinDelInsToken]: + def match(self, input_string: str) -> ProteinDelInsToken | None: """Return a ProteinDelInsToken match if one exists. :param input_string: The input string to match diff --git a/src/variation/tokenizers/protein_insertion.py b/src/variation/tokenizers/protein_insertion.py index 6b482eeb..3dada17b 100644 --- a/src/variation/tokenizers/protein_insertion.py +++ b/src/variation/tokenizers/protein_insertion.py @@ -1,5 +1,4 @@ """A module for Protein Insertion Tokenization Class.""" -from typing import Optional from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -11,7 +10,7 @@ class ProteinInsertion(Tokenizer): """Class for tokenizing Insertions on the protein reference sequence.""" - def match(self, input_string: str) -> Optional[ProteinInsertionToken]: + def match(self, input_string: str) -> ProteinInsertionToken | None: """Return token that match the input string.""" og_input_string = input_string diff --git a/src/variation/tokenizers/protein_reference_agree.py b/src/variation/tokenizers/protein_reference_agree.py index 89b74e75..ca29decc 100644 --- a/src/variation/tokenizers/protein_reference_agree.py +++ b/src/variation/tokenizers/protein_reference_agree.py @@ -1,6 +1,6 @@ """A module for Reference Agree Tokenization.""" + import contextlib -from typing import Optional from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -12,7 +12,7 @@ class ProteinReferenceAgree(Tokenizer): """Class for tokenizing Reference Agree on protein reference sequence.""" - def match(self, input_string: str) -> Optional[ProteinReferenceAgreeToken]: + def match(self, input_string: str) -> ProteinReferenceAgreeToken | None: """Return a ProteinReferenceAgreeToken match if one exists. :param str input_string: The input string to match diff --git a/src/variation/tokenizers/protein_substitution.py b/src/variation/tokenizers/protein_substitution.py index dcc0fccd..6f230681 100644 --- a/src/variation/tokenizers/protein_substitution.py +++ b/src/variation/tokenizers/protein_substitution.py @@ -1,5 +1,4 @@ """A module for Protein Substitution Tokenization.""" -from typing import Optional, Union from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -16,7 +15,7 @@ class ProteinSubstitution(Tokenizer): def match( self, input_string: str - ) -> Optional[Union[ProteinSubstitutionToken, ProteinStopGainToken]]: + ) -> ProteinSubstitutionToken | ProteinStopGainToken | None: """Return a ProteinSubstitutionToken or ProteinStopGainToken match if one exists. diff --git a/src/variation/tokenizers/tokenizer.py b/src/variation/tokenizers/tokenizer.py index 3175d4d2..cfb443d1 100644 --- a/src/variation/tokenizers/tokenizer.py +++ b/src/variation/tokenizers/tokenizer.py @@ -1,6 +1,7 @@ """Module for Tokenization.""" + from abc import ABC, abstractmethod -from typing import ClassVar, Dict, Optional, Tuple +from typing import ClassVar from cool_seq_tool.schemas import AnnotationLayer @@ -10,12 +11,12 @@ class Tokenizer(ABC): """The tokenizer class.""" - coord_types: ClassVar[Dict[str, str]] = { + coord_types: ClassVar[dict[str, str]] = { k: v.value for k, v in AnnotationLayer.__members__.items() } @abstractmethod - def match(self, input_string: str) -> Optional[Token]: + def match(self, input_string: str) -> Token | None: """Return tokens that match the input string. :param input_string: Input string @@ -24,8 +25,8 @@ def match(self, input_string: str) -> Optional[Token]: raise NotImplementedError def strip_coord_prefix( - self, input_string: str, match_coord_type: Optional[AnnotationLayer] = None - ) -> Tuple[Optional[AnnotationLayer], Optional[str]]: + self, input_string: str, match_coord_type: AnnotationLayer | None = None + ) -> tuple[AnnotationLayer | None, str | None]: """Strip parentheses and coordinate type from string :param input_string: Input string @@ -41,7 +42,7 @@ def strip_coord_prefix( def _strip( coord_type: str, string: str, - match_coord_type: Optional[AnnotationLayer] = None, + match_coord_type: AnnotationLayer | None = None, ) -> str: """Strip parentheses and coordinate type from string diff --git a/src/variation/translate.py b/src/variation/translate.py index 789d6f37..7d13cbd5 100644 --- a/src/variation/translate.py +++ b/src/variation/translate.py @@ -1,5 +1,4 @@ """Module for translation.""" -from typing import List, Optional from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.mappers import ManeTranscript @@ -60,7 +59,7 @@ def __init__( """ params = [seqrepo_access, mane_transcript, uta, vrs, hgvs_dup_del_mode] - self.translators: List[Translator] = [ + self.translators: list[Translator] = [ ProteinSubstitution(*params), CdnaSubstitution(*params), GenomicSubstitution(*params), @@ -86,13 +85,13 @@ def __init__( async def perform( self, validation_result: ValidationResult, # this is always valid - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/__init__.py b/src/variation/translators/__init__.py index 703976cc..00dd1c78 100644 --- a/src/variation/translators/__init__.py +++ b/src/variation/translators/__init__.py @@ -1,4 +1,5 @@ """Translator package import.""" + from .amplification import Amplification from .cdna_deletion import CdnaDeletion from .cdna_delins import CdnaDelIns diff --git a/src/variation/translators/ambiguous_translator_base.py b/src/variation/translators/ambiguous_translator_base.py index f287f5c2..7a058431 100644 --- a/src/variation/translators/ambiguous_translator_base.py +++ b/src/variation/translators/ambiguous_translator_base.py @@ -1,5 +1,6 @@ """Module for translating genomic ambiguous deletions and duplications""" -from typing import Dict, List, Literal, NamedTuple, Optional, Union + +from typing import Literal, NamedTuple from ga4gh.vrs import models from pydantic import StrictInt, StrictStr, ValidationError @@ -24,10 +25,10 @@ class AmbiguousData(NamedTuple): """Represents Ambiguous data""" ac: StrictStr - pos0: Union[StrictInt, Literal["?"]] - pos1: Optional[Union[StrictInt, Literal["?"]]] - pos2: Union[StrictInt, Literal["?"]] - pos3: Optional[Union[StrictInt, Literal["?"]]] + pos0: StrictInt | Literal["?"] + pos1: StrictInt | Literal["?"] | None + pos2: StrictInt | Literal["?"] + pos3: StrictInt | Literal["?"] | None class AmbiguousTranslator(Translator): @@ -37,13 +38,11 @@ class AmbiguousTranslator(Translator): async def get_grch38_data_ambiguous( self, - classification: Union[ - GenomicDeletionAmbiguousClassification, - GenomicDuplicationAmbiguousClassification, - ], - errors: List[str], + classification: GenomicDeletionAmbiguousClassification + | GenomicDuplicationAmbiguousClassification, + errors: list[str], ac: str, - ) -> Optional[AmbiguousData]: + ) -> AmbiguousData | None: """Get GRCh38 data for genomic ambiguous duplication or deletion classification :param classification: Classification to get translation for @@ -111,12 +110,12 @@ def get_dup_del_ambiguous_seq_loc( self, ambiguous_type: AmbiguousType, ac: str, - pos0: Union[int, Literal["?"]], - pos1: Optional[Union[int, Literal["?"]]], - pos2: Union[int, Literal["?"]], - pos3: Optional[Union[int, Literal["?"]]], - warnings: List[str], - ) -> Dict: + pos0: int | Literal["?"], + pos1: int | Literal["?"] | None, + pos2: int | Literal["?"], + pos3: int | Literal["?"] | None, + warnings: list[str], + ) -> dict: """Get VRS Sequence Location :param ambiguous_type: Type of ambiguous expression used @@ -154,13 +153,13 @@ def get_dup_del_ambiguous_seq_loc( async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/amplification.py b/src/variation/translators/amplification.py index cbd4034a..765d79af 100644 --- a/src/variation/translators/amplification.py +++ b/src/variation/translators/amplification.py @@ -1,5 +1,4 @@ """Module for Amplification Translation.""" -from typing import List, Optional from ga4gh.core import ga4gh_identify from ga4gh.vrs import models @@ -28,13 +27,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/cdna_deletion.py b/src/variation/translators/cdna_deletion.py index 96fdb5a9..c7da6eee 100644 --- a/src/variation/translators/cdna_deletion.py +++ b/src/variation/translators/cdna_deletion.py @@ -1,5 +1,4 @@ """Module for cDNA Deletion Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/cdna_delins.py b/src/variation/translators/cdna_delins.py index 1498b93c..0659b01b 100644 --- a/src/variation/translators/cdna_delins.py +++ b/src/variation/translators/cdna_delins.py @@ -1,5 +1,4 @@ """Module for Cdna DelIns Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/cdna_insertion.py b/src/variation/translators/cdna_insertion.py index f0d5d029..40fdd844 100644 --- a/src/variation/translators/cdna_insertion.py +++ b/src/variation/translators/cdna_insertion.py @@ -1,5 +1,4 @@ """Module for Cdna insertion Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/cdna_reference_agree.py b/src/variation/translators/cdna_reference_agree.py index 92fe1a09..b76eb7c2 100644 --- a/src/variation/translators/cdna_reference_agree.py +++ b/src/variation/translators/cdna_reference_agree.py @@ -1,5 +1,4 @@ """Module for Cdna Reference Agree Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/cdna_substitution.py b/src/variation/translators/cdna_substitution.py index 755bdf0e..23bda0c2 100644 --- a/src/variation/translators/cdna_substitution.py +++ b/src/variation/translators/cdna_substitution.py @@ -1,5 +1,4 @@ """Module for cDNA Substitution Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/genomic_del_dup_base.py b/src/variation/translators/genomic_del_dup_base.py index 80c879de..82d1ad01 100644 --- a/src/variation/translators/genomic_del_dup_base.py +++ b/src/variation/translators/genomic_del_dup_base.py @@ -1,5 +1,6 @@ """Module for Genomic Deletion Translation.""" -from typing import List, NamedTuple, Optional, Union + +from typing import NamedTuple from cool_seq_tool.schemas import ResidueMode from ga4gh.vrs import models @@ -28,7 +29,7 @@ class DelDupData(NamedTuple): ac: StrictStr pos0: StrictInt - pos1: Optional[StrictInt] + pos1: StrictInt | None class GenomicDelDupTranslator(Translator): @@ -38,10 +39,9 @@ class GenomicDelDupTranslator(Translator): async def get_grch38_data( self, - classification: Union[ - GenomicDeletionClassification, GenomicDuplicationClassification - ], - errors: List[str], + classification: GenomicDeletionClassification + | GenomicDuplicationClassification, + errors: list[str], ac: str, ) -> DelDupData: """Get GRCh38 data for genomic duplication or deletion classification @@ -80,13 +80,13 @@ async def get_grch38_data( async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/genomic_deletion.py b/src/variation/translators/genomic_deletion.py index 7960b8aa..8656a868 100644 --- a/src/variation/translators/genomic_deletion.py +++ b/src/variation/translators/genomic_deletion.py @@ -1,4 +1,5 @@ """Module for Genomic Deletion Translation.""" + from variation.schemas.classification_response_schema import ClassificationType from variation.translators.genomic_del_dup_base import GenomicDelDupTranslator diff --git a/src/variation/translators/genomic_deletion_ambiguous.py b/src/variation/translators/genomic_deletion_ambiguous.py index 666451e9..d501551d 100644 --- a/src/variation/translators/genomic_deletion_ambiguous.py +++ b/src/variation/translators/genomic_deletion_ambiguous.py @@ -1,4 +1,5 @@ """Module for Genomic Deletion Ambiguous Translation.""" + from variation.schemas.classification_response_schema import ClassificationType from variation.translators.ambiguous_translator_base import AmbiguousTranslator diff --git a/src/variation/translators/genomic_delins.py b/src/variation/translators/genomic_delins.py index 9a6d6dcc..0ac27857 100644 --- a/src/variation/translators/genomic_delins.py +++ b/src/variation/translators/genomic_delins.py @@ -1,5 +1,4 @@ """Module for Genomic DelIns Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer, ResidueMode from ga4gh.vrs import models @@ -35,13 +34,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/genomic_duplication.py b/src/variation/translators/genomic_duplication.py index 79a7daec..619ff942 100644 --- a/src/variation/translators/genomic_duplication.py +++ b/src/variation/translators/genomic_duplication.py @@ -1,4 +1,5 @@ """Module for Genomic Duplication Translation.""" + from variation.schemas.classification_response_schema import ClassificationType from variation.translators.genomic_del_dup_base import GenomicDelDupTranslator diff --git a/src/variation/translators/genomic_duplication_ambiguous.py b/src/variation/translators/genomic_duplication_ambiguous.py index 254712b4..4cfec940 100644 --- a/src/variation/translators/genomic_duplication_ambiguous.py +++ b/src/variation/translators/genomic_duplication_ambiguous.py @@ -1,4 +1,5 @@ """Module for Genomic Duplication Ambiguous Translation.""" + from variation.schemas.classification_response_schema import ClassificationType from variation.translators.ambiguous_translator_base import AmbiguousTranslator diff --git a/src/variation/translators/genomic_insertion.py b/src/variation/translators/genomic_insertion.py index 03d636bc..8f551a40 100644 --- a/src/variation/translators/genomic_insertion.py +++ b/src/variation/translators/genomic_insertion.py @@ -1,5 +1,4 @@ """Module for Genomic Insertion Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer, ResidueMode from ga4gh.vrs import models @@ -35,13 +34,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/genomic_reference_agree.py b/src/variation/translators/genomic_reference_agree.py index f1b7d4a9..cffa7123 100644 --- a/src/variation/translators/genomic_reference_agree.py +++ b/src/variation/translators/genomic_reference_agree.py @@ -1,5 +1,4 @@ """Module for Genomic Reference Agree Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer, ResidueMode from ga4gh.vrs import models @@ -35,13 +34,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/genomic_substitution.py b/src/variation/translators/genomic_substitution.py index 6b273337..25ca15a9 100644 --- a/src/variation/translators/genomic_substitution.py +++ b/src/variation/translators/genomic_substitution.py @@ -1,5 +1,4 @@ """Module for Genomic Substitution Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer, ResidueMode, Strand from ga4gh.vrs import models @@ -35,13 +34,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/protein_deletion.py b/src/variation/translators/protein_deletion.py index 70910d44..bf95eaec 100644 --- a/src/variation/translators/protein_deletion.py +++ b/src/variation/translators/protein_deletion.py @@ -1,5 +1,4 @@ """Module for Protein Deletion Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/protein_delins.py b/src/variation/translators/protein_delins.py index 11f1d77f..f633c750 100644 --- a/src/variation/translators/protein_delins.py +++ b/src/variation/translators/protein_delins.py @@ -1,5 +1,4 @@ """Module for Protein DelIns Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/protein_insertion.py b/src/variation/translators/protein_insertion.py index 0dfb3983..925678c0 100644 --- a/src/variation/translators/protein_insertion.py +++ b/src/variation/translators/protein_insertion.py @@ -1,5 +1,4 @@ """Module for Protein Insertion Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/protein_reference_agree.py b/src/variation/translators/protein_reference_agree.py index 05e9f3d8..ed368afd 100644 --- a/src/variation/translators/protein_reference_agree.py +++ b/src/variation/translators/protein_reference_agree.py @@ -1,5 +1,4 @@ """Module for Protein Reference Agree Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/protein_stop_gain.py b/src/variation/translators/protein_stop_gain.py index 88144412..d5d3da2d 100644 --- a/src/variation/translators/protein_stop_gain.py +++ b/src/variation/translators/protein_stop_gain.py @@ -1,5 +1,4 @@ """Module for Protein Stop Gain Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/protein_substitution.py b/src/variation/translators/protein_substitution.py index a1933090..d1b4925d 100644 --- a/src/variation/translators/protein_substitution.py +++ b/src/variation/translators/protein_substitution.py @@ -1,5 +1,4 @@ """Module for Protein Substitution Translation.""" -from typing import List, Optional from cool_seq_tool.schemas import AnnotationLayer from ga4gh.vrs import models @@ -31,13 +30,13 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - warnings: List[str], - endpoint_name: Optional[Endpoint] = None, + warnings: list[str], + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification diff --git a/src/variation/translators/translator.py b/src/variation/translators/translator.py index 9d8bb661..e8ddb481 100644 --- a/src/variation/translators/translator.py +++ b/src/variation/translators/translator.py @@ -1,6 +1,6 @@ """Module for translation.""" + from abc import ABC, abstractmethod -from typing import List, Optional, Union from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.mappers import ManeTranscript @@ -61,12 +61,12 @@ def can_translate(self, classification_type: ClassificationType) -> bool: async def translate( self, validation_result: ValidationResult, - endpoint_name: Optional[Endpoint] = None, + endpoint_name: Endpoint | None = None, hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, - baseline_copies: Optional[int] = None, - copy_change: Optional[models.CopyChange] = None, + baseline_copies: int | None = None, + copy_change: models.CopyChange | None = None, do_liftover: bool = False, - ) -> Optional[TranslationResult]: + ) -> TranslationResult | None: """Translate validation result to VRS representation :param validation_result: Validation result for a classification @@ -86,9 +86,9 @@ def is_valid( alt_ac: str, pos0: int, pos1: int, - errors: List[str], - pos2: Optional[int] = None, - pos3: Optional[int] = None, + errors: list[str], + pos2: int | None = None, + pos3: int | None = None, residue_mode: ResidueMode = ResidueMode.RESIDUE, ) -> None: """Check that positions are valid on a gene. Will mutate `errors` if invalid. @@ -133,7 +133,7 @@ def validate_reference_sequence( end_pos: int, expected_ref: str, residue_mode: ResidueMode = ResidueMode.RESIDUE, - ) -> Optional[str]: + ) -> str | None: """Validate that expected reference sequence matches actual reference sequence This is also in validator, but there is a ticket to have this method be moved to cool-seq-tool. Once added, will be removed @@ -164,12 +164,12 @@ async def get_p_or_cdna_translation_result( start_pos: int, end_pos: int, alt_type: AltType, - coordinate_type: Union[AnnotationLayer.PROTEIN, AnnotationLayer.CDNA], - errors: List[str], - cds_start: Optional[int] = None, - ref: Optional[str] = None, - alt: Optional[str] = None, - ) -> Optional[TranslationResult]: + coordinate_type: AnnotationLayer, + errors: list[str], + cds_start: int | None = None, + ref: str | None = None, + alt: str | None = None, + ) -> TranslationResult | None: """Get translation result for validation result. Used for unambiguous variations on protein or cDNA coordinate types @@ -184,8 +184,15 @@ async def get_p_or_cdna_translation_result( `coordinate_type == AnnotationLayer.CDNA`. :param ref: Expected reference sequence :param alt: Expected change + :raises ValueError: If ``coordinate`` type not one of + ``AnnotationLayer.PROTEIN`` or ``AnnotationLayer.CDNA`` :return: Translation result if successful. Else, `None` """ + supported_coordinate_types = {AnnotationLayer.PROTEIN, AnnotationLayer.CDNA} + if coordinate_type not in supported_coordinate_types: + err_msg = f"`coordinate_type` must be one of {supported_coordinate_types}" + raise ValueError(err_msg) + vrs_allele = None vrs_seq_loc_ac = None vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA diff --git a/src/variation/utils.py b/src/variation/utils.py index f76d44ab..b669af48 100644 --- a/src/variation/utils.py +++ b/src/variation/utils.py @@ -1,19 +1,22 @@ """Module for general functionality throughout the app""" + import contextlib import re -from typing import Dict, List, Literal, Optional, Tuple, Union +from typing import Literal from bioutils.sequences import aa1_to_aa3 as _aa1_to_aa3 from bioutils.sequences import aa3_to_aa1 as _aa3_to_aa1 from cool_seq_tool.handlers import SeqRepoAccess -from ga4gh.core import core_models +from cool_seq_tool.schemas import ResidueMode +from ga4gh.core import domain_models +from ga4gh.vrs import models from variation.schemas.app_schemas import AmbiguousRegexType from variation.schemas.classification_response_schema import AmbiguousType from variation.schemas.service_schema import ClinVarAssembly -def update_warnings_for_no_resp(label: str, warnings: List[str]) -> None: +def update_warnings_for_no_resp(label: str, warnings: list[str]) -> None: """Mutate `warnings` when unable to return a response :param label: Initial input query @@ -24,8 +27,8 @@ def update_warnings_for_no_resp(label: str, warnings: List[str]) -> None: def _get_priority_sequence_location( - locations: List[Dict], seqrepo_access: SeqRepoAccess -) -> Optional[Dict]: + locations: list[dict], seqrepo_access: SeqRepoAccess +) -> dict | None: """Get prioritized sequence location from list of locations Will prioritize GRCh8 over GRCh37. Will also only support chromosomes. @@ -64,8 +67,8 @@ def _get_priority_sequence_location( def get_priority_sequence_location( - gene: core_models.Gene, seqrepo_access: SeqRepoAccess -) -> Optional[Dict]: + gene: domain_models.Gene, seqrepo_access: SeqRepoAccess +) -> dict | None: """Get prioritized sequence location from a gene Will prioritize NCBI and then Ensembl. GRCh38 will be chosen over GRCh37. @@ -85,7 +88,7 @@ def get_priority_sequence_location( return ncbi_loc or ensembl_loc -def get_aa1_codes(aa: str) -> Optional[str]: +def get_aa1_codes(aa: str) -> str | None: """Get 1 letter AA codes given possible AA string (either 1 or 3 letter). Will also validate the input AA string. @@ -111,12 +114,12 @@ def get_aa1_codes(aa: str) -> Optional[str]: def get_ambiguous_type( - pos0: Union[int, Literal["?"]], - pos1: Optional[Union[int, Literal["?"]]], - pos2: Union[int, Literal["?"]], - pos3: Optional[Union[int, Literal["?"]]], + pos0: int | Literal["?"], + pos1: int | Literal["?"] | None, + pos2: int | Literal["?"], + pos3: int | Literal["?"] | None, ambiguous_regex_type: AmbiguousRegexType, -) -> Optional[AmbiguousType]: +) -> AmbiguousType | None: """Get the ambiguous type given positions and regex used :param pos0: Position 0 @@ -162,7 +165,7 @@ def get_ambiguous_type( def get_assembly( seqrepo_access: SeqRepoAccess, alt_ac: str -) -> Tuple[Optional[ClinVarAssembly], Optional[str]]: +) -> tuple[ClinVarAssembly | None, str | None]: """Get GRCh assembly for given genomic RefSeq accession :param seqrepo_access: Access to SeqRepo client @@ -188,8 +191,8 @@ def get_assembly( def get_refget_accession( - seqrepo_access: SeqRepoAccess, alias: str, errors: List[str] -) -> Optional[str]: + seqrepo_access: SeqRepoAccess, alias: str, errors: list[str] +) -> str | None: """Get refget accession for a given alias :param seqrepo_access: Access to SeqRepo client @@ -208,3 +211,29 @@ def get_refget_accession( refget_accession = ids[0].split("ga4gh:")[-1] return refget_accession + + +def get_vrs_loc_seq( + seqrepo_access: SeqRepoAccess, + identifier: str, + start: int | models.Range | None, + end: int | models.Range | None, +) -> str | None: + """Get the literal sequence encoded by the ``identifier`` at the start and end + coordinates. + + Does not support locations that do not have both start/end as ints + + :param seqrepo_access: Access to SeqRepo client + :param identifier: Accession for VRS Location (not ga4gh) + :param start: Start position (inter-residue) + :param end: End position (inter-residue) + :return: Get the literal sequence at the given location + """ + if isinstance(start, int) and isinstance(end, int) and (start != end): + ref, _ = seqrepo_access.get_reference_sequence( + identifier, start, end, residue_mode=ResidueMode.INTER_RESIDUE + ) + else: + ref = None + return ref or None # get_reference_sequence can return empty str diff --git a/src/variation/validate.py b/src/variation/validate.py index 15781ea6..80a269e0 100644 --- a/src/variation/validate.py +++ b/src/variation/validate.py @@ -1,7 +1,7 @@ """Module for Validation.""" -from typing import List from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.mappers import LiftOver from cool_seq_tool.sources import TranscriptMappings, UtaDatabase from gene.query import QueryHandler as GeneQueryHandler @@ -41,6 +41,7 @@ def __init__( transcript_mappings: TranscriptMappings, uta: UtaDatabase, gene_normalizer: GeneQueryHandler, + liftover: LiftOver, ) -> None: """Initialize the validate class. Will create an instance variable, `validators`, which is a list of Validators for supported variation types. @@ -49,9 +50,10 @@ def __init__( :param transcript_mappings: Access to transcript mappings :param uta: Access to UTA queries :param gene_normalizer: Access to gene-normalizer + :param liftover: Instance to provide mapping between human genome assemblies """ - params = [seqrepo_access, transcript_mappings, uta, gene_normalizer] - self.validators: List[Validator] = [ + params = [seqrepo_access, transcript_mappings, uta, gene_normalizer, liftover] + self.validators: list[Validator] = [ ProteinSubstitution(*params), CdnaSubstitution(*params), GenomicSubstitution(*params), diff --git a/src/variation/validators/__init__.py b/src/variation/validators/__init__.py index 33f8f2e1..70cd17ad 100644 --- a/src/variation/validators/__init__.py +++ b/src/variation/validators/__init__.py @@ -1,4 +1,5 @@ """Validator package level import.""" + from .amplification import Amplification from .cdna_deletion import CdnaDeletion from .cdna_delins import CdnaDelIns diff --git a/src/variation/validators/amplification.py b/src/variation/validators/amplification.py index 70cd7a39..7a01644b 100644 --- a/src/variation/validators/amplification.py +++ b/src/variation/validators/amplification.py @@ -1,5 +1,4 @@ """Module for Amplification validation""" -from typing import List from variation.schemas.classification_response_schema import ( AmplificationClassification, @@ -14,8 +13,8 @@ class Amplification(Validator): """The Insertion Validator Base class.""" async def get_valid_invalid_results( - self, classification: AmplificationClassification, accessions: List - ) -> List[ValidationResult]: + self, classification: AmplificationClassification, accessions: list + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -41,8 +40,8 @@ def validates_classification_type( return classification_type == ClassificationType.AMPLIFICATION async def get_accessions( - self, classification: Classification, errors: List - ) -> List: + self, classification: Classification, errors: list + ) -> list: """Return empty list since amplification does not require accessions :param classification: The classification for list of tokens diff --git a/src/variation/validators/cdna_deletion.py b/src/variation/validators/cdna_deletion.py index c93ed452..c3055041 100644 --- a/src/variation/validators/cdna_deletion.py +++ b/src/variation/validators/cdna_deletion.py @@ -1,5 +1,4 @@ """The module for cDNA Deletion Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( CdnaDeletionClassification, @@ -15,8 +14,8 @@ class CdnaDeletion(Validator): """The cDNA Deletion Validator class.""" async def get_valid_invalid_results( - self, classification: CdnaDeletionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: CdnaDeletionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -96,8 +95,8 @@ def validates_classification_type( return classification_type == ClassificationType.CDNA_DELETION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/cdna_delins.py b/src/variation/validators/cdna_delins.py index a939394f..3cd75ed8 100644 --- a/src/variation/validators/cdna_delins.py +++ b/src/variation/validators/cdna_delins.py @@ -1,5 +1,4 @@ """The module for Cdna DelIns Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( CdnaDelInsClassification, @@ -15,8 +14,8 @@ class CdnaDelIns(Validator): """The Cdna DelIns Validator class.""" async def get_valid_invalid_results( - self, classification: CdnaDelInsClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: CdnaDelInsClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -75,8 +74,8 @@ def validates_classification_type( return classification_type == ClassificationType.CDNA_DELINS async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/cdna_insertion.py b/src/variation/validators/cdna_insertion.py index d285d417..9445b332 100644 --- a/src/variation/validators/cdna_insertion.py +++ b/src/variation/validators/cdna_insertion.py @@ -1,5 +1,4 @@ """The module for Cdna Insertion Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( CdnaInsertionClassification, @@ -15,8 +14,8 @@ class CdnaInsertion(Validator): """The Cdna Insertion Validator class.""" async def get_valid_invalid_results( - self, classification: CdnaInsertionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: CdnaInsertionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -73,8 +72,8 @@ def validates_classification_type( return classification_type == ClassificationType.CDNA_INSERTION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/cdna_reference_agree.py b/src/variation/validators/cdna_reference_agree.py index 17d16572..72f11e4a 100644 --- a/src/variation/validators/cdna_reference_agree.py +++ b/src/variation/validators/cdna_reference_agree.py @@ -1,5 +1,4 @@ """The module for Cdna Substitution Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( CdnaReferenceAgreeClassification, @@ -15,8 +14,8 @@ class CdnaReferenceAgree(Validator): """The Cdna Reference Agree Validator class.""" async def get_valid_invalid_results( - self, classification: CdnaReferenceAgreeClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: CdnaReferenceAgreeClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -57,8 +56,8 @@ def validates_classification_type( return classification_type == ClassificationType.CDNA_REFERENCE_AGREE async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/cdna_substitution.py b/src/variation/validators/cdna_substitution.py index 5c887c4c..ebf63883 100644 --- a/src/variation/validators/cdna_substitution.py +++ b/src/variation/validators/cdna_substitution.py @@ -1,5 +1,4 @@ """The module for cDNA Substitution Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( CdnaSubstitutionClassification, @@ -15,8 +14,8 @@ class CdnaSubstitution(Validator): """The cDNA Substitution Validator class.""" async def get_valid_invalid_results( - self, classification: CdnaSubstitutionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: CdnaSubstitutionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -60,8 +59,8 @@ def validates_classification_type( return classification_type == ClassificationType.CDNA_SUBSTITUTION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_base.py b/src/variation/validators/genomic_base.py index eafad2df..ef8146ed 100644 --- a/src/variation/validators/genomic_base.py +++ b/src/variation/validators/genomic_base.py @@ -1,6 +1,6 @@ """Module for Genomic Validation methods.""" + import logging -from typing import List, Optional from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.sources import UtaDatabase @@ -10,8 +10,7 @@ Nomenclature, ) -logger = logging.getLogger("variation") -logger.setLevel(logging.DEBUG) +_logger = logging.getLogger(__name__) class GenomicBase: @@ -28,7 +27,7 @@ def __init__(self, seqrepo_access: SeqRepoAccess, uta: UtaDatabase) -> None: """The Genomic Base class.""" - async def get_nc_accessions(self, classification: Classification) -> List[str]: + async def get_nc_accessions(self, classification: Classification) -> list[str]: """Get NC accession for a given classification.""" if classification.nomenclature == Nomenclature.HGVS: nc_accessions = [classification.ac] @@ -50,7 +49,7 @@ async def get_nc_accessions(self, classification: Classification) -> List[str]: return nc_accessions - def get_nc_accession(self, identifier: str) -> Optional[str]: + def get_nc_accession(self, identifier: str) -> str | None: """Given an identifier (assembly+chr), return nc accession.""" nc_accession = None try: @@ -58,7 +57,7 @@ def get_nc_accession(self, identifier: str) -> Optional[str]: identifier ) except KeyError: - logger.warning("Data Proxy unable to get metadata for %s", identifier) + _logger.warning("Data Proxy unable to get metadata for %s", identifier) else: aliases = [a for a in translated_identifiers if a.startswith("refseq:NC_")] if aliases: diff --git a/src/variation/validators/genomic_deletion.py b/src/variation/validators/genomic_deletion.py index cb3c9c4f..becd8721 100644 --- a/src/variation/validators/genomic_deletion.py +++ b/src/variation/validators/genomic_deletion.py @@ -1,5 +1,4 @@ """The module for Genomic Deletion Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class GenomicDeletion(Validator): """The Genomic Deletion Validator class.""" async def get_valid_invalid_results( - self, classification: GenomicDeletionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: GenomicDeletionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -111,8 +110,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_DELETION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_deletion_ambiguous.py b/src/variation/validators/genomic_deletion_ambiguous.py index ab999e53..26b3096f 100644 --- a/src/variation/validators/genomic_deletion_ambiguous.py +++ b/src/variation/validators/genomic_deletion_ambiguous.py @@ -1,5 +1,4 @@ """The module for Genomic Deletion Ambiguous Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( AmbiguousType, @@ -18,8 +17,8 @@ class GenomicDeletionAmbiguous(Validator): async def get_valid_invalid_results( self, classification: GenomicDeletionAmbiguousClassification, - accessions: List[str], - ) -> List[ValidationResult]: + accessions: list[str], + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -103,8 +102,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_DELETION_AMBIGUOUS async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_delins.py b/src/variation/validators/genomic_delins.py index 1e8f9b36..67b57f73 100644 --- a/src/variation/validators/genomic_delins.py +++ b/src/variation/validators/genomic_delins.py @@ -1,5 +1,4 @@ """The module for Genomic DelIns Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class GenomicDelIns(Validator): """The Genomic DelIns Validator class.""" async def get_valid_invalid_results( - self, classification: GenomicDelInsClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: GenomicDelInsClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -82,8 +81,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_DELINS async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_duplication.py b/src/variation/validators/genomic_duplication.py index 89d82250..fc8d6858 100644 --- a/src/variation/validators/genomic_duplication.py +++ b/src/variation/validators/genomic_duplication.py @@ -1,5 +1,4 @@ """The module for Genomic Duplication Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class GenomicDuplication(Validator): """The Genomic Duplication Validator class.""" async def get_valid_invalid_results( - self, classification: GenomicDuplicationClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: GenomicDuplicationClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -77,8 +76,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_DUPLICATION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_duplication_ambiguous.py b/src/variation/validators/genomic_duplication_ambiguous.py index 9479d744..2fb53e40 100644 --- a/src/variation/validators/genomic_duplication_ambiguous.py +++ b/src/variation/validators/genomic_duplication_ambiguous.py @@ -1,5 +1,4 @@ """The module for Genomic Duplication Ambiguous Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( AmbiguousType, @@ -18,8 +17,8 @@ class GenomicDuplicationAmbiguous(Validator): async def get_valid_invalid_results( self, classification: GenomicDuplicationAmbiguousClassification, - accessions: List[str], - ) -> List[ValidationResult]: + accessions: list[str], + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -103,8 +102,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_insertion.py b/src/variation/validators/genomic_insertion.py index 262128ea..742437c2 100644 --- a/src/variation/validators/genomic_insertion.py +++ b/src/variation/validators/genomic_insertion.py @@ -1,5 +1,4 @@ """The module for Genomic Insertion Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class GenomicInsertion(Validator): """The Genomic Insertion Validator class.""" async def get_valid_invalid_results( - self, classification: GenomicInsertionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: GenomicInsertionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -82,8 +81,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_INSERTION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_reference_agree.py b/src/variation/validators/genomic_reference_agree.py index ec30c1da..d1f985e2 100644 --- a/src/variation/validators/genomic_reference_agree.py +++ b/src/variation/validators/genomic_reference_agree.py @@ -1,5 +1,4 @@ """The module for Genomic Reference Agree Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class GenomicReferenceAgree(Validator): """The Genomic Reference Agree Validator class.""" async def get_valid_invalid_results( - self, classification: GenomicReferenceAgreeClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: GenomicReferenceAgreeClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -63,8 +62,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/genomic_substitution.py b/src/variation/validators/genomic_substitution.py index 898750b1..a2488cb8 100644 --- a/src/variation/validators/genomic_substitution.py +++ b/src/variation/validators/genomic_substitution.py @@ -1,5 +1,4 @@ """The module for Genomic Substitution Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class GenomicSubstitution(Validator): """The Genomic Substitution Validator class.""" async def get_valid_invalid_results( - self, classification: GenomicSubstitutionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: GenomicSubstitutionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -60,8 +59,8 @@ def validates_classification_type( return classification_type == ClassificationType.GENOMIC_SUBSTITUTION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/protein_deletion.py b/src/variation/validators/protein_deletion.py index c8fc9d0d..e515bcf5 100644 --- a/src/variation/validators/protein_deletion.py +++ b/src/variation/validators/protein_deletion.py @@ -1,5 +1,4 @@ """The module for Protein Deletion Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class ProteinDeletion(Validator): """The Protein Deletion Validator class.""" async def get_valid_invalid_results( - self, classification: ProteinDeletionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: ProteinDeletionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -112,8 +111,8 @@ def validates_classification_type( return classification_type == ClassificationType.PROTEIN_DELETION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/protein_delins.py b/src/variation/validators/protein_delins.py index 3580185d..4d38a39d 100644 --- a/src/variation/validators/protein_delins.py +++ b/src/variation/validators/protein_delins.py @@ -1,5 +1,4 @@ """The module for Protein DelIns Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class ProteinDelIns(Validator): """The Protein DelIns Validator class.""" async def get_valid_invalid_results( - self, classification: ProteinDelInsClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: ProteinDelInsClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -91,8 +90,8 @@ def validates_classification_type( return classification_type == ClassificationType.PROTEIN_DELINS async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/protein_insertion.py b/src/variation/validators/protein_insertion.py index 9a7087dd..79e71d2c 100644 --- a/src/variation/validators/protein_insertion.py +++ b/src/variation/validators/protein_insertion.py @@ -1,5 +1,4 @@ """The module for Protein Insertion Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class ProteinInsertion(Validator): """The Protein Insertion Validator class.""" async def get_valid_invalid_results( - self, classification: ProteinInsertionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: ProteinInsertionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -91,8 +90,8 @@ def validates_classification_type( return classification_type == ClassificationType.PROTEIN_INSERTION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/protein_reference_agree.py b/src/variation/validators/protein_reference_agree.py index 23811143..e1739e63 100644 --- a/src/variation/validators/protein_reference_agree.py +++ b/src/variation/validators/protein_reference_agree.py @@ -1,5 +1,4 @@ """The module for Protein Reference Agree Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class ProteinReferenceAgree(Validator): """The Protein Reference Agree Validator class.""" async def get_valid_invalid_results( - self, classification: ProteinReferenceAgreeClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: ProteinReferenceAgreeClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -68,8 +67,8 @@ def validates_classification_type( return classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/protein_stop_gain.py b/src/variation/validators/protein_stop_gain.py index 67bd9120..8c9e6f41 100644 --- a/src/variation/validators/protein_stop_gain.py +++ b/src/variation/validators/protein_stop_gain.py @@ -1,5 +1,4 @@ """The module for Protein Stop Gain Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class ProteinStopGain(Validator): """The Protein Stop Gain Validator class.""" async def get_valid_invalid_results( - self, classification: ProteinStopGainClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: ProteinStopGainClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -71,8 +70,8 @@ def validates_classification_type( return classification_type == ClassificationType.PROTEIN_STOP_GAIN async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/protein_substitution.py b/src/variation/validators/protein_substitution.py index ba4df8a9..368f9713 100644 --- a/src/variation/validators/protein_substitution.py +++ b/src/variation/validators/protein_substitution.py @@ -1,5 +1,4 @@ """The module for Protein Substitution Validation.""" -from typing import List from variation.schemas.classification_response_schema import ( Classification, @@ -15,8 +14,8 @@ class ProteinSubstitution(Validator): """The Protein Substitution Validator class.""" async def get_valid_invalid_results( - self, classification: ProteinSubstitutionClassification, accessions: List[str] - ) -> List[ValidationResult]: + self, classification: ProteinSubstitutionClassification, accessions: list[str] + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -68,8 +67,8 @@ def validates_classification_type( return classification_type == ClassificationType.PROTEIN_SUBSTITUTION async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. diff --git a/src/variation/validators/validator.py b/src/variation/validators/validator.py index f7fb8b0d..aee55ca6 100644 --- a/src/variation/validators/validator.py +++ b/src/variation/validators/validator.py @@ -1,9 +1,11 @@ """Module for Validation.""" + from abc import ABC, abstractmethod -from typing import List, Literal, Optional, Tuple, Union +from typing import Literal from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.schemas import ResidueMode +from cool_seq_tool.mappers import LiftOver +from cool_seq_tool.schemas import Assembly, ResidueMode from cool_seq_tool.sources import TranscriptMappings, UtaDatabase from gene.query import QueryHandler as GeneQueryHandler from gene.schemas import SourceName @@ -36,6 +38,7 @@ def __init__( transcript_mappings: TranscriptMappings, uta: UtaDatabase, gene_normalizer: GeneQueryHandler, + liftover: LiftOver, ) -> None: """Initialize the DelIns validator. @@ -43,17 +46,19 @@ def __init__( :param transcript_mappings: Access to transcript mappings :param uta: Access to UTA queries :param gene_normalizer: Access to gene-normalizer + :param liftover: Instance to provide mapping between human genome assemblies """ self.transcript_mappings = transcript_mappings self.seqrepo_access = seqrepo_access self.uta = uta self.genomic_base = GenomicBase(self.seqrepo_access, self.uta) self.gene_normalizer = gene_normalizer + self.liftover = liftover @abstractmethod async def get_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get accessions for a given classification. If `classification.nomenclature == Nomenclature.HGVS`, will return the accession in the HGVS expression. @@ -77,8 +82,8 @@ def validates_classification_type( @abstractmethod async def get_valid_invalid_results( - self, classification: Classification, accessions: List - ) -> List[ValidationResult]: + self, classification: Classification, accessions: list + ) -> list[ValidationResult]: """Get list of validation results for a given classification and accessions :param classification: A classification for a list of tokens @@ -86,7 +91,7 @@ async def get_valid_invalid_results( :return: List of validation results containing invalid and valid results """ - async def validate(self, classification: Classification) -> List[ValidationResult]: + async def validate(self, classification: Classification) -> list[ValidationResult]: """Get list of associated accessions for a classification. Use these accessions to perform validation checks (pos exists, accession is valid, reference sequence matches expected, etc). Gets list of validation results for a given @@ -114,7 +119,7 @@ async def validate(self, classification: Classification) -> List[ValidationResul ] return await self.get_valid_invalid_results(classification, accessions) - def get_protein_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: + def get_protein_accessions(self, gene_token: GeneToken, errors: list) -> list[str]: """Get accessions for variations with protein reference sequence. :param gene_token: Gene token for a classification @@ -128,7 +133,7 @@ def get_protein_accessions(self, gene_token: GeneToken, errors: List) -> List[st ) return accessions - def get_cdna_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: + def get_cdna_accessions(self, gene_token: GeneToken, errors: list) -> list[str]: """Get accessions for variations with cDNA reference sequence. :param gene_token: Gene token for a classification @@ -143,8 +148,8 @@ def get_cdna_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: return accessions async def get_genomic_accessions( - self, classification: Classification, errors: List - ) -> List[str]: + self, classification: Classification, errors: list + ) -> list[str]: """Get genomic RefSeq accessions for variations with genomic reference sequence. :param classification: Classification for a list of tokens @@ -161,11 +166,11 @@ async def _validate_gene_pos( gene: str, alt_ac: str, pos0: int, - pos1: Optional[int], - pos2: Optional[int] = None, - pos3: Optional[int] = None, + pos1: int | None, + pos2: int | None = None, + pos3: int | None = None, residue_mode: ResidueMode = ResidueMode.RESIDUE, - ) -> Optional[str]: + ) -> str | None: """Validate whether free text genomic query is valid input. If invalid input, add error to list of errors @@ -199,13 +204,12 @@ async def _validate_gene_pos( chromosome, assembly = assembly for key in gene_start_end: gene_pos = gene_start_end[key] - gene_pos_liftover = self.uta.liftover_38_to_37.convert_coordinate( - chromosome, gene_pos + gene_pos_liftover = self.liftover.get_liftover( + chromosome, gene_pos, Assembly.GRCH37 ) if gene_pos_liftover is None or len(gene_pos_liftover) == 0: return f"{gene_pos} does not exist on {chromosome}" - - gene_start_end[key] = gene_pos_liftover[0][1] + gene_start_end[key] = gene_pos_liftover[1] gene_start = gene_start_end["start"] gene_end = gene_start_end["end"] @@ -226,7 +230,7 @@ def validate_reference_sequence( end_pos: int, expected_ref: str, residue_mode: ResidueMode = ResidueMode.RESIDUE, - ) -> Optional[str]: + ) -> str | None: """Validate that expected reference sequence matches actual reference sequence. This is also in translator, but there is a ticket to have this method be moved to cool-seq-tool. Once added, will be removed @@ -250,7 +254,7 @@ def validate_reference_sequence( return err_msg - async def get_cds_start(self, ac: str) -> Tuple[Optional[int], Optional[str]]: + async def get_cds_start(self, ac: str) -> tuple[int | None, str | None]: """Get coding start site for accession :param ac: Accession to get coding start site for @@ -272,9 +276,9 @@ def validate_ac_and_pos( self, ac: str, start_pos: int, - end_pos: Optional[int] = None, + end_pos: int | None = None, residue_mode: ResidueMode = ResidueMode.RESIDUE, - ) -> Optional[str]: + ) -> str | None: """Validate that accession exists and that position(s) exist on accession :param ac: Accession @@ -311,10 +315,10 @@ def validate_ac_and_pos( @staticmethod def validate_5_prime_to_3_prime( pos0: int, - pos1: Optional[Union[int, Literal["?"]]], - pos2: Optional[Union[int, Literal["?"]]] = None, - pos3: Optional[Union[int, Literal["?"]]] = None, - ) -> Optional[str]: + pos1: int | Literal["?"] | None, + pos2: int | Literal["?"] | None = None, + pos3: int | Literal["?"] | None = None, + ) -> str | None: """Validate that positions are unique and listed from 5' to 3' :param pos0: Position 0 @@ -343,11 +347,9 @@ def validate_5_prime_to_3_prime( def validate_ambiguous_classification( self, - classification: Union[ - GenomicDeletionAmbiguousClassification, - GenomicDuplicationAmbiguousClassification, - ], - ) -> Optional[str]: + classification: GenomicDeletionAmbiguousClassification + | GenomicDuplicationAmbiguousClassification, + ) -> str | None: """Validate that ambiguous type is supported and that positions are unique and listed from 5' to 3' @@ -374,15 +376,13 @@ def validate_ambiguous_classification( def validate_protein_hgvs_classification( self, - classification: Union[ - ProteinDelInsClassification, - ProteinDeletionClassification, - ProteinInsertionClassification, - ProteinReferenceAgreeClassification, - ProteinStopGainClassification, - ProteinSubstitutionClassification, - ], - ) -> List[str]: + classification: ProteinDelInsClassification + | ProteinDeletionClassification + | ProteinInsertionClassification + | ProteinReferenceAgreeClassification + | ProteinStopGainClassification + | ProteinSubstitutionClassification, + ) -> list[str]: """Validate protein HGVS classification :param classification: Classification diff --git a/src/variation/version.py b/src/variation/version.py deleted file mode 100644 index 2f615d6c..00000000 --- a/src/variation/version.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Module for version of app""" -__version__ = "0.8.2" diff --git a/src/variation/vrs_representation.py b/src/variation/vrs_representation.py index 1d12c4b3..30ab1398 100644 --- a/src/variation/vrs_representation.py +++ b/src/variation/vrs_representation.py @@ -1,5 +1,4 @@ """Module for generating VRS objects""" -from typing import Dict, List, Optional, Tuple, Union from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.schemas import AnnotationLayer, ResidueMode @@ -26,8 +25,8 @@ def __init__(self, seqrepo_access: SeqRepoAccess) -> None: @staticmethod def get_start_end( - coordinate: str, start: int, end: int, cds_start: int, errors: List - ) -> Optional[Tuple[int, int]]: + coordinate: str, start: int, end: int, cds_start: int, errors: list + ) -> tuple[int, int] | None: """Get start and end coordinates. :param str coordinate: Coordinate used. Must be either `p`, `c`, or `g` @@ -72,8 +71,8 @@ def get_end_indef_range(end: int) -> models.Range: @staticmethod def get_sequence_loc( refget_accession: str, - start: Union[int, models.Range], - end: Union[int, models.Range], + start: int | models.Range, + end: int | models.Range, ) -> models.Location: """Return VRS location @@ -93,14 +92,12 @@ def get_sequence_loc( def vrs_allele( self, ac: str, - start: Union[int, models.Range], - end: Union[int, models.Range], - sstate: Union[ - models.LiteralSequenceExpression, models.ReferenceLengthExpression - ], + start: int | models.Range, + end: int | models.Range, + sstate: models.LiteralSequenceExpression | models.ReferenceLengthExpression, alt_type: AltType, - errors: List[str], - ) -> Optional[Dict]: + errors: list[str], + ) -> dict | None: """Create a VRS Allele object. :param ac: Accession @@ -151,11 +148,11 @@ def to_vrs_allele( end: int, coordinate: AnnotationLayer, alt_type: AltType, - errors: List[str], - cds_start: Optional[int] = None, - alt: Optional[str] = None, + errors: list[str], + cds_start: int | None = None, + alt: str | None = None, residue_mode: ResidueMode = ResidueMode.RESIDUE, - ) -> Optional[Dict]: + ) -> dict | None: """Translate accession and position to VRS Allele Object. :param ac: Accession diff --git a/tests/__init__.py b/tests/__init__.py index dfe3a3f7..a17fdde4 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,5 @@ """Test package.""" + from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parents[1] diff --git a/tests/conftest.py b/tests/conftest.py index 09865eed..67bae951 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,8 @@ """Create methods used throughout tests.""" + import asyncio import contextlib +import logging import pytest from cool_seq_tool.app import CoolSeqTool @@ -14,6 +16,33 @@ from variation.tokenizers import GeneSymbol +def pytest_addoption(parser): + """Add custom commands to pytest invocation. + See https://docs.pytest.org/en/7.1.x/reference/reference.html#parser + """ + parser.addoption( + "--verbose-logs", + action="store_true", + default=False, + help="show noisy module logs", + ) + + +def pytest_configure(config): + """Configure pytest setup.""" + if not config.getoption("--verbose-logs"): + logging.getLogger("cool_seq_tool").setLevel(logging.INFO) + logging.getLogger("boto3").setLevel(logging.ERROR) + logging.getLogger("botocore").setLevel(logging.ERROR) + logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR) + logging.getLogger("hgvs.parser").setLevel(logging.INFO) + logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel( + logging.INFO + ) + logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO) + logging.getLogger("asyncio").setLevel(logging.INFO) + + @pytest.fixture(scope="session") def event_loop(): """Create an instance of the default event loop for each test case.""" @@ -54,6 +83,7 @@ def val_params(test_cool_seq_tool, test_gene_normalizer): test_cool_seq_tool.transcript_mappings, test_cool_seq_tool.uta_db, test_gene_normalizer, + test_cool_seq_tool.liftover, ] @@ -113,6 +143,7 @@ def braf_600loc(): "type": "SequenceReference", "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", }, + "sequence": "V", "type": "SequenceLocation", } @@ -142,6 +173,7 @@ def vhl_reference_agree(): "type": "SequenceReference", "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", }, + "sequence": "P", "type": "SequenceLocation", }, "state": {"sequence": "P", "type": "LiteralSequenceExpression"}, @@ -182,6 +214,7 @@ def protein_deletion_np_range(): "type": "SequenceReference", "refgetAccession": "SQ.AF1UFydIo02-bMplonKSfxlWY2q6ze3m", }, + "sequence": "LRENT", "type": "SequenceLocation", }, "state": { @@ -206,6 +239,7 @@ def braf_v600e_genomic_sub(): "type": "SequenceReference", "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", }, + "sequence": "A", "type": "SequenceLocation", }, "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, @@ -225,6 +259,7 @@ def genomic_dup1_seq_loc_normalized(): "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", }, + "sequence": "GG", "start": 49531260, "end": 49531262, "type": "SequenceLocation", @@ -246,6 +281,7 @@ def genomic_dup1_seq_loc_not_normalized(): }, "start": 49531261, "end": 49531262, + "sequence": "G", "type": "SequenceLocation", } @@ -274,6 +310,7 @@ def genomic_dup2_seq_loc_normalized(): }, "start": 33211289, "end": 33211293, + "sequence": "TCTA", "type": "SequenceLocation", } @@ -355,6 +392,7 @@ def genomic_del1_seq_loc(): }, "start": 10149810, "end": 10149811, + "sequence": "T", "type": "SequenceLocation", } @@ -394,6 +432,7 @@ def genomic_del2_seq_loc(): "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", }, + "sequence": "ATGTTGACGGACAGCCTAT", "start": 10146594, "end": 10146613, "type": "SequenceLocation", @@ -479,6 +518,7 @@ def grch38_genomic_insertion_seq_loc(): "type": "SequenceReference", "refgetAccession": "SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", }, + "sequence": "TACGTGATGGCT", "type": "SequenceLocation", } @@ -572,7 +612,7 @@ def assertion_checks(normalize_response, test_variation, check_vrs_id=False): if not check_vrs_id: _vrs_id_and_digest_existence_checks(actual) - expected = test_variation.copy().model_dump(exclude_none=True) + expected = test_variation.model_copy().model_dump(exclude_none=True) if not check_vrs_id: _delete_id_and_digest(expected) _delete_id_and_digest(expected["location"]) @@ -594,7 +634,7 @@ def cnv_assertion_checks(resp, test_fixture, check_vrs_id=False): if not check_vrs_id: _vrs_id_and_digest_existence_checks(actual, prefix=prefix) - expected = test_fixture.copy().model_dump(exclude_none=True) + expected = test_fixture.model_copy().model_dump(exclude_none=True) if not check_vrs_id: _delete_id_and_digest(expected) _delete_id_and_digest(expected["location"]) diff --git a/tests/test_classifier.py b/tests/test_classifier.py index e0ca546f..46c9acd5 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -1,4 +1,5 @@ """Module for testing classifiers""" + from pathlib import Path import pytest diff --git a/tests/test_gnomad_vcf_to_protein.py b/tests/test_gnomad_vcf_to_protein.py index aadf9096..0ff42edf 100644 --- a/tests/test_gnomad_vcf_to_protein.py +++ b/tests/test_gnomad_vcf_to_protein.py @@ -1,4 +1,5 @@ """Module for testing gnomad_vcf_to_protein works correctly""" + import pytest from ga4gh.vrs import models @@ -22,6 +23,7 @@ def mmel1_l30m(): "type": "SequenceReference", "refgetAccession": "SQ.iQ8F_pnsiQOLohiV2qh3OWRZiftUt8jZ", }, + "sequence": "L", "type": "SequenceLocation", }, "state": {"sequence": "M", "type": "LiteralSequenceExpression"}, @@ -41,6 +43,7 @@ def cdk11a_e314del(): "type": "SequenceReference", "refgetAccession": "SQ.N728VSRRMHJ1SrhJgKqJOCaa3l5Z4sqm", }, + "sequence": "EEEEEEEEEEEEE", "type": "SequenceLocation", }, "state": { @@ -65,6 +68,7 @@ def protein_insertion2(): "type": "SequenceReference", "refgetAccession": "SQ.qgIh8--4F6IpxRwX_lVtD2BhepH5B5Ef", }, + "Sequence": "Q", "type": "SequenceLocation", }, "state": {"sequence": "R", "type": "LiteralSequenceExpression"}, @@ -83,6 +87,7 @@ def atad3a_loc(): "type": "SequenceReference", "refgetAccession": "SQ.MHPOY_7fv8V9SktyvaTxulVFSK6XCxM8", }, + "sequence": "I", "type": "SequenceLocation", } @@ -155,6 +160,7 @@ def kras_g12d(): }, "start": 11, "end": 12, + "sequence": "G", }, "state": {"type": "LiteralSequenceExpression", "sequence": "D"}, } @@ -176,6 +182,7 @@ def multi_nuc_sub_pos(): }, "start": 242, "end": 244, + "sequence": "LP", }, "state": {"type": "LiteralSequenceExpression", "sequence": "PS"}, } @@ -197,6 +204,7 @@ def multi_nuc_sub_neg(): }, "start": 235, "end": 236, + "sequence": "S", }, "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, } @@ -216,6 +224,7 @@ def delins_pos(): }, "start": 746, "end": 752, + "sequence": "LREATS", }, "state": {"type": "LiteralSequenceExpression", "sequence": "Q"}, } @@ -235,6 +244,7 @@ def delins_neg(): }, "start": 239, "end": 259, + "sequence": "PRLLFPTNSSSHLVALQGQP", }, "state": {"type": "LiteralSequenceExpression", "sequence": "TLTA"}, } @@ -265,7 +275,6 @@ async def test_substitution( resp = await test_handler.gnomad_vcf_to_protein("7-140753336-A-T") assertion_checks(resp, braf_v600e, check_vrs_id=True) assert resp.gene_context - assert resp.vrs_ref_allele_seq == "V" assert resp.warnings == [] # Reading Frame 3, Negative Strand @@ -311,7 +320,6 @@ async def test_reference_agree(test_handler, vhl_reference_agree): # https://www.ncbi.nlm.nih.gov/clinvar/variation/379039/?new_evidence=true resp = await test_handler.gnomad_vcf_to_protein("3-10142030-C-T") assertion_checks(resp, vhl_reference_agree) - assert resp.vrs_ref_allele_seq == "P" assert resp.gene_context assert resp.warnings == [] @@ -322,14 +330,14 @@ async def test_insertion(test_handler, protein_insertion, protein_insertion2): # positive strand (CA645561585) resp = await test_handler.gnomad_vcf_to_protein("7-55181319-C-CGGGTTA") assertion_checks(resp, protein_insertion) - assert resp.vrs_ref_allele_seq is None + assert resp.variation.location.sequence is None assert resp.gene_context assert resp.warnings == [] # negative strand (CA860540) resp = await test_handler.gnomad_vcf_to_protein("1-53327836-A-AGCC") assertion_checks(resp, protein_insertion2) - assert resp.vrs_ref_allele_seq is None + assert resp.variation.location.sequence is None assert resp.gene_context assert resp.warnings == [] @@ -339,7 +347,6 @@ async def test_deletion(test_handler, protein_deletion_np_range, cdk11a_e314del) """Test that deletion queries return correct response""" resp = await test_handler.gnomad_vcf_to_protein("17-39723966-TTGAGGGAAAACACAT-T") assertion_checks(resp, protein_deletion_np_range) - assert resp.vrs_ref_allele_seq == "LRENT" assert resp.gene_context assert resp.warnings == [] @@ -355,7 +362,6 @@ async def test_delins(test_handler, delins_pos, delins_neg): # CA645561524, Positive Strand resp = await test_handler.gnomad_vcf_to_protein("7-55174776-TTAAGAGAAGCAACATCT-CAA") assertion_checks(resp, delins_pos) - assert resp.vrs_ref_allele_seq == "LREATS" assert resp.gene_context # ClinVar ID 1217291, Negative Strand @@ -363,7 +369,6 @@ async def test_delins(test_handler, delins_pos, delins_neg): "X-153870419-GCTGCCCCTGCAAGGCCACCAGGTGGCTGCTGGAGTTGGTGGGGAAGAGCAGGCGCGG-CTGTCAATGT" ) assertion_checks(resp, delins_neg) - assert resp.vrs_ref_allele_seq == "PRLLFPTNSSSHLVALQGQP" assert resp.gene_context # CA16602420. Example where protein gene not found, but cDNA gene found @@ -378,7 +383,6 @@ async def test_invalid(test_handler): """Test that invalid queries return correct response""" resp = await test_handler.gnomad_vcf_to_protein("BRAF V600E") assert resp.variation is None - assert resp.vrs_ref_allele_seq is None assert resp.gene_context is None assert resp.warnings == [ "BRAF V600E is not a gnomAD VCF-like query (`chr-pos-ref-alt`)" @@ -386,12 +390,10 @@ async def test_invalid(test_handler): resp = await test_handler.gnomad_vcf_to_protein("7-140753336-T-G") assert resp.variation is None - assert resp.vrs_ref_allele_seq is None assert resp.gene_context is None assert set(resp.warnings) == {"Unable to get cDNA and protein representation"} resp = await test_handler.gnomad_vcf_to_protein("20-2-TC-TG") assert resp.variation is None - assert resp.vrs_ref_allele_seq is None assert resp.gene_context is None assert resp.warnings == ["20-2-TC-TG is not a valid gnomad vcf query"] diff --git a/tests/test_hgvs_dup_del_mode.py b/tests/test_hgvs_dup_del_mode.py index 14a48927..b018ec6e 100644 --- a/tests/test_hgvs_dup_del_mode.py +++ b/tests/test_hgvs_dup_del_mode.py @@ -1,4 +1,5 @@ """Module for testing HGVS Dup Del mode.""" + import pytest from ga4gh.vrs import models @@ -55,6 +56,7 @@ def genomic_dup1_free_text_seq_loc_normalized(): }, "start": 1032, "end": 1034, + "sequence": "GG", "type": "SequenceLocation", } @@ -69,6 +71,7 @@ def genomic_dup1_free_text_seq_loc_not_normalized(): }, "start": 1033, "end": 1034, + "sequence": "G", "type": "SequenceLocation", } @@ -137,6 +140,7 @@ def seq_loc_gt_100_bp(): }, "start": 33211289, "end": 33211490, + "sequence": "TCTACTTCTTCCCACCAAAGCATTTTGAAAAGTGTATATCAAGGCAGCGATAAAAAAAACCTGGTAAAAGTTCTTCAAACTTTATTGCTCCAGTAGGCTTAAAAACAATGAGAAACCAACAAACTTCAGCAGCTTTAAAAAAAGTAACACTTCAGTTTTTCCTATTCGTTTTTCTCCGAAGGTAATTGCCTCCCAGATCTG", "type": "SequenceLocation", } @@ -166,6 +170,7 @@ def genomic_dup2_free_text_seq_loc(): }, "start": 256, "end": 260, + "sequence": "TAGA", "type": "SequenceLocation", } @@ -398,6 +403,7 @@ def genomic_del1_free_text_seq_loc(): }, "start": 557, "end": 558, + "sequence": "T", "type": "SequenceLocation", } @@ -512,6 +518,7 @@ def genomic_del2_free_text_seq_loc(): }, "start": 491, "end": 510, + "sequence": "ATGTTGACGGACAGCCTAT", "type": "SequenceLocation", } diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 107bad6c..4e0f65a9 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -1,4 +1,5 @@ """Module for testing the normalize endpoint.""" + from datetime import datetime import pytest @@ -27,6 +28,7 @@ def dis3_p63a(): "type": "SequenceReference", "refgetAccession": "SQ.mlWsxfPKINN3o300stAI8oqN5U7P6kEu", }, + "sequence": "P", "type": "SequenceLocation", }, "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, @@ -65,6 +67,7 @@ def vhl(): "type": "SequenceReference", "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", }, + "sequence": "Y", "type": "SequenceLocation", }, "state": {"sequence": "*", "type": "LiteralSequenceExpression"}, @@ -84,6 +87,7 @@ def nm_004448_cdna_delins(): "type": "SequenceReference", "refgetAccession": "SQ.y9b4LVMiCXpZxOg9Xt1NwRtssA03MwWM", }, + "sequence": "GG", "type": "SequenceLocation", }, "state": {"sequence": "CT", "type": "LiteralSequenceExpression"}, @@ -103,6 +107,7 @@ def nm_000551(): "type": "SequenceReference", "refgetAccession": "SQ.xBKOKptLLDr-k4hTyCetvARn16pDS_rW", }, + "sequence": "C", "type": "SequenceLocation", }, "state": {"sequence": "AA", "type": "LiteralSequenceExpression"}, @@ -121,6 +126,7 @@ def braf_cdna_seq_loc(): "type": "SequenceReference", "refgetAccession": "SQ.aKMPEJgmlZXt_F6gRY5cUG3THH2n-GUa", }, + "sequence": "T", "type": "SequenceLocation", } @@ -158,6 +164,7 @@ def protein_delins(): "type": "SequenceReference", "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE", }, + "sequence": "LREAT", "type": "SequenceLocation", }, "state": {"sequence": "P", "type": "LiteralSequenceExpression"}, @@ -179,6 +186,7 @@ def cdna_deletion(): "type": "SequenceReference", "refgetAccession": "SQ.y9b4LVMiCXpZxOg9Xt1NwRtssA03MwWM", }, + "sequence": "TTGAGGGAAAACACAT", "type": "SequenceLocation", }, "state": { @@ -205,6 +213,7 @@ def genomic_deletion(): "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", }, + "sequence": "CTCT", "type": "SequenceLocation", }, "state": { @@ -248,6 +257,7 @@ def genomic_insertion(): "type": "SequenceReference", "refgetAccession": "SQ.y9b4LVMiCXpZxOg9Xt1NwRtssA03MwWM", }, + "sequence": "TACGTGATGGCT", "type": "SequenceLocation", }, "state": { @@ -272,6 +282,7 @@ def genomic_substitution(): "type": "SequenceReference", "refgetAccession": "SQ.d_QsP29RWJi6bac7GOC9cJ9AO7s_HUMN", }, + "sequence": "C", "type": "SequenceLocation", }, "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, @@ -291,6 +302,7 @@ def gnomad_vcf_genomic_sub_mnv(): "type": "SequenceReference", "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI", }, + "sequence": "G", "type": "SequenceLocation", }, "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, @@ -310,6 +322,7 @@ def genomic_sub_grch38(): "type": "SequenceReference", "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", }, + "sequence": "C", "type": "SequenceLocation", }, "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, @@ -329,6 +342,7 @@ def grch38_braf_genom_reference_agree(): "type": "SequenceReference", "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", }, + "sequence": "A", "type": "SequenceLocation", }, "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, @@ -348,6 +362,7 @@ def grch38_genomic_delins1(): "type": "SequenceReference", "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", }, + "sequence": "CA", "type": "SequenceLocation", }, "state": {"sequence": "AT", "type": "LiteralSequenceExpression"}, @@ -367,6 +382,7 @@ def grch38_genomic_delins2(): "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", }, + "sequence": "C", "type": "SequenceLocation", }, "state": {"sequence": "AA", "type": "LiteralSequenceExpression"}, @@ -386,6 +402,7 @@ def genomic_delins_gene(): "type": "SequenceReference", "refgetAccession": "SQ.aKMPEJgmlZXt_F6gRY5cUG3THH2n-GUa", }, + "sequence": "TG", "type": "SequenceLocation", }, "state": {"sequence": "AT", "type": "LiteralSequenceExpression"}, @@ -409,6 +426,7 @@ def gnomad_vcf_genomic_delins1(): "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", }, + "sequence": "AAAAGCTTTA", "type": "SequenceLocation", }, "state": {"sequence": "GAGGCTTT", "type": "LiteralSequenceExpression"}, @@ -428,6 +446,7 @@ def gnomad_vcf_genomic_delins2(): "type": "SequenceReference", "refgetAccession": "SQ.yC_0RBj3fgBlvgyAuycbzdubtLxq-rE0", }, + "sequence": "AG", "type": "SequenceLocation", }, "state": {"sequence": "TGAGTTT", "type": "LiteralSequenceExpression"}, @@ -447,6 +466,7 @@ def gnomad_vcf_genomic_delins3(): "type": "SequenceReference", "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", }, + "sequence": "GG", "type": "SequenceLocation", }, "state": { @@ -469,6 +489,7 @@ def gnomad_vcf_genomic_delins4(): "type": "SequenceReference", "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", }, + "sequence": "CCC", "type": "SequenceLocation", }, "state": { @@ -495,6 +516,7 @@ def gnomad_vcf_genomic_delins5(): }, "start": 7675139, "end": 7675141, + "sequence": "GG", }, "state": { "type": "ReferenceLengthExpression", @@ -507,7 +529,7 @@ def gnomad_vcf_genomic_delins5(): @pytest.mark.asyncio() -async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g262c): +async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a): """Test that protein substitutions normalize correctly.""" resp = await test_handler.normalize(" BRAF V600E ") assertion_checks(resp, braf_v600e, check_vrs_id=True) @@ -928,31 +950,28 @@ async def test_no_matches(test_handler): async def test_service_meta(): """Test that service meta info populates correctly.""" response = await normalize_get_response("BRAF v600e", "default") - service_meta = response["service_meta_"] - assert service_meta["name"] == "variation-normalizer" - assert service_meta["version"] - assert isinstance(service_meta["response_datetime"], datetime) + service_meta = response.service_meta_ + assert service_meta.name == "variation-normalizer" + assert service_meta.version + assert isinstance(service_meta.response_datetime, datetime) assert ( - service_meta["url"] - == "https://github.com/cancervariants/variation-normalization" + service_meta.url == "https://github.com/cancervariants/variation-normalization" ) response = await normalize_get_response("this-wont-normalize", "default") - service_meta = response["service_meta_"] - assert service_meta["name"] == "variation-normalizer" - assert service_meta["version"] - assert isinstance(service_meta["response_datetime"], datetime) + service_meta = response.service_meta_ + assert service_meta.name == "variation-normalizer" + assert service_meta.version + assert isinstance(service_meta.response_datetime, datetime) assert ( - service_meta["url"] - == "https://github.com/cancervariants/variation-normalization" + service_meta.url == "https://github.com/cancervariants/variation-normalization" ) response = await to_vrs_get_response("this-wont-normalize") - service_meta = response["service_meta_"] - assert service_meta["name"] == "variation-normalizer" - assert service_meta["version"] - assert isinstance(service_meta["response_datetime"], datetime) + service_meta = response.service_meta_ + assert service_meta.name == "variation-normalizer" + assert service_meta.version + assert isinstance(service_meta.response_datetime, datetime) assert ( - service_meta["url"] - == "https://github.com/cancervariants/variation-normalization" + service_meta.url == "https://github.com/cancervariants/variation-normalization" ) diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index bf3132e8..234dbb3a 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -1,4 +1,5 @@ """Module for testing tokenizers""" + from pathlib import Path import pytest diff --git a/tests/test_translator.py b/tests/test_translator.py index 1d6d0dcc..6beb743b 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -1,4 +1,5 @@ """Module for testing translators""" + from pathlib import Path import pytest diff --git a/tests/test_validator.py b/tests/test_validator.py index ee07e501..e2c227e9 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,4 +1,5 @@ """Module for testing validators""" + from pathlib import Path import pytest diff --git a/tests/to_copy_number_variation/test_amplification_to_cx_var.py b/tests/to_copy_number_variation/test_amplification_to_cx_var.py index e9e06e2b..e428b289 100644 --- a/tests/to_copy_number_variation/test_amplification_to_cx_var.py +++ b/tests/to_copy_number_variation/test_amplification_to_cx_var.py @@ -1,4 +1,5 @@ """Module for testing Amplification to Copy Number Change""" + import pytest from ga4gh.vrs import models from tests.conftest import cnv_assertion_checks diff --git a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py index 309a491a..4e0bbd90 100644 --- a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py +++ b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py @@ -29,6 +29,7 @@ def genomic_dup1_37_loc(): }, "start": 49568694, "end": 49568695, + "sequence": "G", "type": "SequenceLocation", } @@ -76,6 +77,7 @@ def genomic_dup2_37_loc(): }, "start": 33229406, "end": 33229410, + "sequence": "TCTA", "type": "SequenceLocation", } @@ -344,6 +346,7 @@ def genomic_del1_37_loc(): }, "start": 10191494, "end": 10191495, + "sequence": "T", "type": "SequenceLocation", } @@ -391,6 +394,7 @@ def genomic_del2_37_loc(): }, "start": 10188278, "end": 10188297, + "sequence": "ATGTTGACGGACAGCCTAT", "type": "SequenceLocation", } diff --git a/tests/to_copy_number_variation/test_parsed_to_copy_number.py b/tests/to_copy_number_variation/test_parsed_to_copy_number.py index b4091475..7ce36c29 100644 --- a/tests/to_copy_number_variation/test_parsed_to_copy_number.py +++ b/tests/to_copy_number_variation/test_parsed_to_copy_number.py @@ -1,4 +1,5 @@ """Test that parsed_to_copy_number works correctly""" + from copy import deepcopy import pytest