Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup proper error handling throughout the application #94

Merged
merged 1 commit into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Implementations of the PVS1 algorithm."""

import typer
from loguru import logger

from src.defs.autopvs1 import (
Expand Down Expand Up @@ -71,7 +70,7 @@ def resolve_variant(self) -> SeqVar | StrucVar | None:
)
logger.debug("Resolved structural variant: {}", strucvar)
return strucvar
except (InvalidPos, ParseError) as e:
except ParseError as e:
logger.error("Failed to resolve structural variant: {}", e)
return None
except Exception as e:
Expand Down
92 changes: 51 additions & 41 deletions src/defs/seqvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,17 @@ def __eq__(self, other):
class SeqVarResolver:
"""The class to resolve sequence variants."""

def __init__(self):
pass

def _validate_seqvar(self, variant: SeqVar) -> SeqVar:
"""
Validate the sequence variant position.
"""Validate the sequence variant position.

Args:
variant (SeqVar): Sequence variant

:param variant: Sequence variant
:type variant: SeqVar
:return: Sequence variant
:rtype: SeqVar
:raises InvalidPos: If the position is invalid
Returns:
SeqVar: Sequence variant

Raises:
InvalidPos: If the position is invalid
"""
if variant.pos < 1:
raise InvalidPos(f"Invalid position: {variant.pos}")
Expand All @@ -114,22 +113,26 @@ def _validate_seqvar(self, variant: SeqVar) -> SeqVar:
return variant

def _normalize_chrom(self, value: str) -> str:
"""Normalize the chromosome name: replace 'chr' with '' and 'm' with 'mt'."""
"""Normalize the chromosome name.

Replaces 'chr' with '' and 'm' with 'mt'.
"""
return value.lower().replace("chr", "").replace("m", "mt").upper()

def _parse_separated_seqvar(
self, value: str, default_genome_release: GenomeRelease = GenomeRelease.GRCh38
) -> SeqVar:
"""
Parse a colon/hyphen separated sequence variant representation.

:param value: Sequence variant representation
:type value: str
:param default_genome_build: Default genome build
:type default_genome_build: GenomeRelease
:return: Sequence variant
:rtype: SeqVar
:raises ParseError: If the variant representation is invalid
"""Parse a colon/hyphen separated sequence variant representation.

Args:
value (str): Sequence variant representation
default_genome_release (GenomeRelease): Default genome release

Returns:
SeqVar: Sequence variant

Raises:
ParseError: If the variant representation is invalid
"""
match = REGEX_GNOMAD_VARIANT.match(value) or REGEX_RELAXED_SPDI.match(value)
if not match:
Expand All @@ -155,14 +158,16 @@ def _parse_separated_seqvar(
return self._validate_seqvar(variant)

def _parse_canonical_spdi_seqvar(self, value: str) -> SeqVar:
"""
Parse a canonical SPDI sequence variant representation.
"""Parse a canonical SPDI sequence variant representation.

Args:
value (str): Sequence variant representation

Returns:
SeqVar: Sequence variant

:param value: Sequence variant representation
:type value: str
:return: Sequence variant
:rtype: SeqVar
:raises ParseError: If the variant representation is invalid
Raises:
ParseError: If the variant representation is invalid
"""
match = REGEX_CANONICAL_SPDI.match(value)
if not match:
Expand Down Expand Up @@ -193,27 +198,34 @@ def _parse_canonical_spdi_seqvar(self, value: str) -> SeqVar:
return self._validate_seqvar(variant)

def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
"""
Resolve a sequence variant. Supports gnomAD-style, SPDI and dbSNP representations.
"""Resolve a sequence variant.

Supports gnomAD-style, SPDI and dbSNP representations.
ClinVar IDs are not supported at the moment.

:param value: Sequence variant representation
:type value: str
:param genome_release: Genome release
:type genome_release: GenomeRelease
:return: Sequence variant
:rtype: SeqVar
:raises ParseError: If the variant representation is invalid
Args:
value (str): Sequence variant representation
genome_release (GenomeRelease): Genome release version

Returns:
SeqVar: Sequence variant

Raises:
ParseError: If the variant representation is invalid or cannot be resolved
"""
try:
return self._parse_separated_seqvar(value, default_genome_release=genome_release)
except ParseError:
pass
except InvalidPos as e:
raise ParseError(f"Invalid position: {e}")

try:
return self._parse_canonical_spdi_seqvar(value)
except ParseError:
pass
except InvalidPos as e:
raise ParseError(f"Invalid position: {e}")

try:
dotty_client = DottyClient()
Expand All @@ -232,7 +244,5 @@ def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
)
else:
raise ParseError(f"Unable to resolve seqvar: {value}")
except ParseError:
pass

raise ParseError(f"Unable to resolve seqvar: {value}")
except Exception as e:
raise ParseError(f"Unable to resolve seqvar. The error was: {e}")
67 changes: 36 additions & 31 deletions src/defs/strucvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,17 @@ def __eq__(self, other):
class StrucVarResolver:
"""The class to resolve structural variant representations."""

def __init__(self):
pass

def _validate_strucvar(self, variant: StrucVar) -> StrucVar:
"""
Validate the structural variant position.
"""Validate the structural variant position.

Args:
variant (StrucVar): Structural variant

Returns:
StrucVar: Validated structural variant

:param variant: Structural variant
:type variant: StrucVar
:return: Structural variant
:rtype: StrucVar
:raises InvalidPos: If the structural variant position is invalid
Raises:
InvalidPos: If the variant position is invalid
"""
if variant.start > variant.stop or variant.start < 1:
raise InvalidPos(f"Invalid positions: start={variant.start}, stop={variant.stop}")
Expand All @@ -111,22 +110,26 @@ def _validate_strucvar(self, variant: StrucVar) -> StrucVar:
return variant

def _normalize_chromosome(self, chrom: str) -> str:
"""Normalize the chromosome name."""
"""Normalize the chromosome name.

Replace 'chr' with an empty string and 'm' with 'MT'.
"""
return chrom.lower().replace("chr", "").replace("m", "mt").upper()

def _parse_separated_strucvar(
self, value: str, default_genome_release: GenomeRelease = GenomeRelease.GRCh38
) -> StrucVar:
"""
Parse a separated structural variant representation.

:param value: Structural variant representation
:type value: str
:param default_genome_release: Default genome release
:type default_genome_release: GenomeRelease
:return: Structural variant
:rtype: StrucVar
:raises ParseError: If the structural variant representation is invalid
"""Parse a separated structural variant representation.

Args:
value: Structural variant representation
default_genome_release: Default genome release

Returns:
StrucVar: Structural variant

Raises:
ParseError: If the structural variant representation is invalid
"""
match_colon = REGEX_CNV_COLON.match(value)
match_hyphen = REGEX_CNV_HYPHEN.match(value)
Expand All @@ -147,16 +150,18 @@ def _parse_separated_strucvar(
return self._validate_strucvar(variant)

def resolve_strucvar(self, value: str, genome_release: GenomeRelease) -> StrucVar:
"""
Resolve the structural variant representation.

:param value: Structural variant representation
:type value: str
:param genome_release: Genome release
:type genome_release: GenomeRelease
:return: Structural variant
:rtype: StrucVar
:raises ParseError: If the structural variant representation is invalid
"""Resolve the structural variant representation.

Args:
value: Structural variant representation
genome_release: Genome release

Returns:
StrucVar: Resolved structural variant

Raises:
ParseError: If the structural variant representation is invalid.
Captures the InvalidPos exception and raises a ParseError as well.
"""
try:
return self._parse_separated_strucvar(value, genome_release)
Expand Down
60 changes: 33 additions & 27 deletions src/pvs1/seqvar_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,38 @@
class SeqVarPVS1Helper:
"""Helper methods for PVS1 criteria for sequence variants."""

@staticmethod
def _choose_hgvs_p(
hgvs: str, seqvar_ts: TranscriptSeqvar, seqvar_transcripts: List[TranscriptSeqvar]
) -> str:
"""Choose the most suitable protein HGVS notation.

This method chooses the most suitable protein HGVS notation for the sequence variant based
on the available transcripts.

Note:
Use this method only in SeqVarPVS1 initialization.

Args:
hgvs: The transcript HGVS notation.
seqvar_ts: The sequence variant transcript.
seqvar_transcripts: A list of all sequence variant transcripts.

Returns:
str: The most suitable protein HGVS notation.
"""
logger.debug("Choosing the most suitable protein HGVS notation.")
# Return pHGVS from the main transcript
if seqvar_ts.hgvs_p and seqvar_ts.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the main transcript {}.", hgvs)
return hgvs + ":" + seqvar_ts.hgvs_p
# Choose the first transcript with a protein HGVS
for transcript in seqvar_transcripts:
if transcript.hgvs_p and transcript.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the transcript {}.", transcript.feature_id)
return hgvs + ":" + transcript.hgvs_p
return hgvs + ":p.?"

@staticmethod
def _get_pHGVS_termination(pHGVS: str) -> int:
"""Gets the termination position from a protein HGVS (p.HGVS) notation.
Expand Down Expand Up @@ -682,32 +714,6 @@ def __init__(self, seqvar: SeqVar):
self.prediction: PVS1Prediction = PVS1Prediction.NotPVS1
self.prediction_path: PVS1PredictionSeqVarPath = PVS1PredictionSeqVarPath.NotSet

@staticmethod
def choose_hgvs_p(
hgvs: str, seqvar_ts: TranscriptSeqvar, seqvar_transcripts: List[TranscriptSeqvar]
) -> str:
"""Choose the most suitable protein HGVS notation.

Args:
hgvs: The transcript HGVS notation.
seqvar_ts: The sequence variant transcript.
seqvar_transcripts: A list of all sequence variant transcripts.

Returns:
str: The most suitable protein HGVS notation.
"""
logger.debug("Choosing the most suitable protein HGVS notation.")
# Return pHGVS from the main transcript
if seqvar_ts.hgvs_p and seqvar_ts.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the main transcript {}.", hgvs)
return hgvs + ":" + seqvar_ts.hgvs_p
# Choose the first transcript with a protein HGVS
for transcript in seqvar_transcripts:
if transcript.hgvs_p and transcript.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the transcript {}.", transcript.feature_id)
return hgvs + ":" + transcript.hgvs_p
return hgvs + ":p.?"

def initialize(self):
"""Setup the PVS1 class.

Expand Down Expand Up @@ -737,7 +743,7 @@ def initialize(self):
# Set attributes
logger.debug("Setting up the attributes for the PVS1 class.")
self.HGVS = self._gene_transcript.id
self.pHGVS = self.choose_hgvs_p(self.HGVS, self._seqvar_transcript, self._all_seqvar_ts)
self.pHGVS = self._choose_hgvs_p(self.HGVS, self._seqvar_transcript, self._all_seqvar_ts)
self.tHGVS = self.HGVS + ":" + (self._seqvar_transcript.hgvs_t or "")
self.HGNC_id = self._seqvar_transcript.gene_id
self.transcript_tags = self._seqvar_transcript.feature_tag
Expand Down
34 changes: 33 additions & 1 deletion tests/pvs1/test_seqvar_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,39 @@ def __init__(self, start_codon, stop_codon, cds_start, cds_end, exons):
self.exons = exons


# === SeqVarPVS1Helpers ===
# === SeqVarPVS1Helper ===


@pytest.mark.parametrize(
"main_hgvs, main_hgvs_p, transcripts_data, expected_result",
[
# Case where main transcript has valid protein HGVS
("NM_000001.1", "p.Gly100Ser", [], "NM_000001.1:p.Gly100Ser"),
# Case where main transcript HGVS protein is not set, but another transcript has it
("NM_000001.1", "", [("NM_000002.1", "p.Arg200Gln")], "NM_000001.1:p.Arg200Gln"),
# Case where main transcript and others do not have valid protein HGVS
("NM_000001.1", "", [("NM_000002.1", ""), ("NM_000003.1", "p.?")], "NM_000001.1:p.?"),
# Case with no valid protein HGVS notation in any transcript
("NM_000001.1", "p.?", [("NM_000002.1", "p.?"), ("NM_000003.1", "")], "NM_000001.1:p.?"),
# Case where multiple transcripts have valid HGVS, but the first valid one is chosen
(
"NM_000001.1",
"",
[("NM_000002.1", ""), ("NM_000003.1", "p.Lys300Thr")],
"NM_000001.1:p.Lys300Thr",
),
],
)
def test_choose_hgvs_p(main_hgvs, main_hgvs_p, transcripts_data, expected_result):
# Mocking the main and other transcripts
main_transcript = MagicMock(hgvs_p=main_hgvs_p)
transcripts = [MagicMock(feature_id=id, hgvs_p=hgvs_p) for id, hgvs_p in transcripts_data]

# Invoke the method under test
result = SeqVarPVS1Helper._choose_hgvs_p(main_hgvs, main_transcript, transcripts) # type: ignore

# Verify the result
assert result == expected_result


# TODO: Check if the termination number is correct
Expand Down