Skip to content

Commit

Permalink
Work on exceptions and tests (#94)
Browse files Browse the repository at this point in the history
  • Loading branch information
gromdimon authored May 2, 2024
1 parent 4b2303e commit 24fcc7c
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 102 deletions.
3 changes: 1 addition & 2 deletions src/auto_acmg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Implementations of the PVS1 algorithm."""

import typer
from loguru import logger

from src.defs.autopvs1 import (
Expand Down Expand Up @@ -71,7 +70,7 @@ def resolve_variant(self) -> SeqVar | StrucVar | None:
)
logger.debug("Resolved structural variant: {}", strucvar)
return strucvar
except (InvalidPos, ParseError) as e:
except ParseError as e:
logger.error("Failed to resolve structural variant: {}", e)
return None
except Exception as e:
Expand Down
92 changes: 51 additions & 41 deletions src/defs/seqvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,17 @@ def __eq__(self, other):
class SeqVarResolver:
"""The class to resolve sequence variants."""

def __init__(self):
pass

def _validate_seqvar(self, variant: SeqVar) -> SeqVar:
"""
Validate the sequence variant position.
"""Validate the sequence variant position.
Args:
variant (SeqVar): Sequence variant
:param variant: Sequence variant
:type variant: SeqVar
:return: Sequence variant
:rtype: SeqVar
:raises InvalidPos: If the position is invalid
Returns:
SeqVar: Sequence variant
Raises:
InvalidPos: If the position is invalid
"""
if variant.pos < 1:
raise InvalidPos(f"Invalid position: {variant.pos}")
Expand All @@ -114,22 +113,26 @@ def _validate_seqvar(self, variant: SeqVar) -> SeqVar:
return variant

def _normalize_chrom(self, value: str) -> str:
"""Normalize the chromosome name: replace 'chr' with '' and 'm' with 'mt'."""
"""Normalize the chromosome name.
Replaces 'chr' with '' and 'm' with 'mt'.
"""
return value.lower().replace("chr", "").replace("m", "mt").upper()

def _parse_separated_seqvar(
self, value: str, default_genome_release: GenomeRelease = GenomeRelease.GRCh38
) -> SeqVar:
"""
Parse a colon/hyphen separated sequence variant representation.
:param value: Sequence variant representation
:type value: str
:param default_genome_build: Default genome build
:type default_genome_build: GenomeRelease
:return: Sequence variant
:rtype: SeqVar
:raises ParseError: If the variant representation is invalid
"""Parse a colon/hyphen separated sequence variant representation.
Args:
value (str): Sequence variant representation
default_genome_release (GenomeRelease): Default genome release
Returns:
SeqVar: Sequence variant
Raises:
ParseError: If the variant representation is invalid
"""
match = REGEX_GNOMAD_VARIANT.match(value) or REGEX_RELAXED_SPDI.match(value)
if not match:
Expand All @@ -155,14 +158,16 @@ def _parse_separated_seqvar(
return self._validate_seqvar(variant)

def _parse_canonical_spdi_seqvar(self, value: str) -> SeqVar:
"""
Parse a canonical SPDI sequence variant representation.
"""Parse a canonical SPDI sequence variant representation.
Args:
value (str): Sequence variant representation
Returns:
SeqVar: Sequence variant
:param value: Sequence variant representation
:type value: str
:return: Sequence variant
:rtype: SeqVar
:raises ParseError: If the variant representation is invalid
Raises:
ParseError: If the variant representation is invalid
"""
match = REGEX_CANONICAL_SPDI.match(value)
if not match:
Expand Down Expand Up @@ -193,27 +198,34 @@ def _parse_canonical_spdi_seqvar(self, value: str) -> SeqVar:
return self._validate_seqvar(variant)

def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
"""
Resolve a sequence variant. Supports gnomAD-style, SPDI and dbSNP representations.
"""Resolve a sequence variant.
Supports gnomAD-style, SPDI and dbSNP representations.
ClinVar IDs are not supported at the moment.
:param value: Sequence variant representation
:type value: str
:param genome_release: Genome release
:type genome_release: GenomeRelease
:return: Sequence variant
:rtype: SeqVar
:raises ParseError: If the variant representation is invalid
Args:
value (str): Sequence variant representation
genome_release (GenomeRelease): Genome release version
Returns:
SeqVar: Sequence variant
Raises:
ParseError: If the variant representation is invalid or cannot be resolved
"""
try:
return self._parse_separated_seqvar(value, default_genome_release=genome_release)
except ParseError:
pass
except InvalidPos as e:
raise ParseError(f"Invalid position: {e}")

try:
return self._parse_canonical_spdi_seqvar(value)
except ParseError:
pass
except InvalidPos as e:
raise ParseError(f"Invalid position: {e}")

try:
dotty_client = DottyClient()
Expand All @@ -232,7 +244,5 @@ def resolve_seqvar(self, value: str, genome_release: GenomeRelease) -> SeqVar:
)
else:
raise ParseError(f"Unable to resolve seqvar: {value}")
except ParseError:
pass

raise ParseError(f"Unable to resolve seqvar: {value}")
except Exception as e:
raise ParseError(f"Unable to resolve seqvar. The error was: {e}")
67 changes: 36 additions & 31 deletions src/defs/strucvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,17 @@ def __eq__(self, other):
class StrucVarResolver:
"""The class to resolve structural variant representations."""

def __init__(self):
pass

def _validate_strucvar(self, variant: StrucVar) -> StrucVar:
"""
Validate the structural variant position.
"""Validate the structural variant position.
Args:
variant (StrucVar): Structural variant
Returns:
StrucVar: Validated structural variant
:param variant: Structural variant
:type variant: StrucVar
:return: Structural variant
:rtype: StrucVar
:raises InvalidPos: If the structural variant position is invalid
Raises:
InvalidPos: If the variant position is invalid
"""
if variant.start > variant.stop or variant.start < 1:
raise InvalidPos(f"Invalid positions: start={variant.start}, stop={variant.stop}")
Expand All @@ -111,22 +110,26 @@ def _validate_strucvar(self, variant: StrucVar) -> StrucVar:
return variant

def _normalize_chromosome(self, chrom: str) -> str:
"""Normalize the chromosome name."""
"""Normalize the chromosome name.
Replace 'chr' with an empty string and 'm' with 'MT'.
"""
return chrom.lower().replace("chr", "").replace("m", "mt").upper()

def _parse_separated_strucvar(
self, value: str, default_genome_release: GenomeRelease = GenomeRelease.GRCh38
) -> StrucVar:
"""
Parse a separated structural variant representation.
:param value: Structural variant representation
:type value: str
:param default_genome_release: Default genome release
:type default_genome_release: GenomeRelease
:return: Structural variant
:rtype: StrucVar
:raises ParseError: If the structural variant representation is invalid
"""Parse a separated structural variant representation.
Args:
value: Structural variant representation
default_genome_release: Default genome release
Returns:
StrucVar: Structural variant
Raises:
ParseError: If the structural variant representation is invalid
"""
match_colon = REGEX_CNV_COLON.match(value)
match_hyphen = REGEX_CNV_HYPHEN.match(value)
Expand All @@ -147,16 +150,18 @@ def _parse_separated_strucvar(
return self._validate_strucvar(variant)

def resolve_strucvar(self, value: str, genome_release: GenomeRelease) -> StrucVar:
"""
Resolve the structural variant representation.
:param value: Structural variant representation
:type value: str
:param genome_release: Genome release
:type genome_release: GenomeRelease
:return: Structural variant
:rtype: StrucVar
:raises ParseError: If the structural variant representation is invalid
"""Resolve the structural variant representation.
Args:
value: Structural variant representation
genome_release: Genome release
Returns:
StrucVar: Resolved structural variant
Raises:
ParseError: If the structural variant representation is invalid.
Captures the InvalidPos exception and raises a ParseError as well.
"""
try:
return self._parse_separated_strucvar(value, genome_release)
Expand Down
60 changes: 33 additions & 27 deletions src/pvs1/seqvar_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,38 @@
class SeqVarPVS1Helper:
"""Helper methods for PVS1 criteria for sequence variants."""

@staticmethod
def _choose_hgvs_p(
hgvs: str, seqvar_ts: TranscriptSeqvar, seqvar_transcripts: List[TranscriptSeqvar]
) -> str:
"""Choose the most suitable protein HGVS notation.
This method chooses the most suitable protein HGVS notation for the sequence variant based
on the available transcripts.
Note:
Use this method only in SeqVarPVS1 initialization.
Args:
hgvs: The transcript HGVS notation.
seqvar_ts: The sequence variant transcript.
seqvar_transcripts: A list of all sequence variant transcripts.
Returns:
str: The most suitable protein HGVS notation.
"""
logger.debug("Choosing the most suitable protein HGVS notation.")
# Return pHGVS from the main transcript
if seqvar_ts.hgvs_p and seqvar_ts.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the main transcript {}.", hgvs)
return hgvs + ":" + seqvar_ts.hgvs_p
# Choose the first transcript with a protein HGVS
for transcript in seqvar_transcripts:
if transcript.hgvs_p and transcript.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the transcript {}.", transcript.feature_id)
return hgvs + ":" + transcript.hgvs_p
return hgvs + ":p.?"

@staticmethod
def _get_pHGVS_termination(pHGVS: str) -> int:
"""Gets the termination position from a protein HGVS (p.HGVS) notation.
Expand Down Expand Up @@ -682,32 +714,6 @@ def __init__(self, seqvar: SeqVar):
self.prediction: PVS1Prediction = PVS1Prediction.NotPVS1
self.prediction_path: PVS1PredictionSeqVarPath = PVS1PredictionSeqVarPath.NotSet

@staticmethod
def choose_hgvs_p(
hgvs: str, seqvar_ts: TranscriptSeqvar, seqvar_transcripts: List[TranscriptSeqvar]
) -> str:
"""Choose the most suitable protein HGVS notation.
Args:
hgvs: The transcript HGVS notation.
seqvar_ts: The sequence variant transcript.
seqvar_transcripts: A list of all sequence variant transcripts.
Returns:
str: The most suitable protein HGVS notation.
"""
logger.debug("Choosing the most suitable protein HGVS notation.")
# Return pHGVS from the main transcript
if seqvar_ts.hgvs_p and seqvar_ts.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the main transcript {}.", hgvs)
return hgvs + ":" + seqvar_ts.hgvs_p
# Choose the first transcript with a protein HGVS
for transcript in seqvar_transcripts:
if transcript.hgvs_p and transcript.hgvs_p not in ["", "p.?"]:
logger.debug("Protein HGVS found in the transcript {}.", transcript.feature_id)
return hgvs + ":" + transcript.hgvs_p
return hgvs + ":p.?"

def initialize(self):
"""Setup the PVS1 class.
Expand Down Expand Up @@ -737,7 +743,7 @@ def initialize(self):
# Set attributes
logger.debug("Setting up the attributes for the PVS1 class.")
self.HGVS = self._gene_transcript.id
self.pHGVS = self.choose_hgvs_p(self.HGVS, self._seqvar_transcript, self._all_seqvar_ts)
self.pHGVS = self._choose_hgvs_p(self.HGVS, self._seqvar_transcript, self._all_seqvar_ts)
self.tHGVS = self.HGVS + ":" + (self._seqvar_transcript.hgvs_t or "")
self.HGNC_id = self._seqvar_transcript.gene_id
self.transcript_tags = self._seqvar_transcript.feature_tag
Expand Down
34 changes: 33 additions & 1 deletion tests/pvs1/test_seqvar_pvs1.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,39 @@ def __init__(self, start_codon, stop_codon, cds_start, cds_end, exons):
self.exons = exons


# === SeqVarPVS1Helpers ===
# === SeqVarPVS1Helper ===


@pytest.mark.parametrize(
"main_hgvs, main_hgvs_p, transcripts_data, expected_result",
[
# Case where main transcript has valid protein HGVS
("NM_000001.1", "p.Gly100Ser", [], "NM_000001.1:p.Gly100Ser"),
# Case where main transcript HGVS protein is not set, but another transcript has it
("NM_000001.1", "", [("NM_000002.1", "p.Arg200Gln")], "NM_000001.1:p.Arg200Gln"),
# Case where main transcript and others do not have valid protein HGVS
("NM_000001.1", "", [("NM_000002.1", ""), ("NM_000003.1", "p.?")], "NM_000001.1:p.?"),
# Case with no valid protein HGVS notation in any transcript
("NM_000001.1", "p.?", [("NM_000002.1", "p.?"), ("NM_000003.1", "")], "NM_000001.1:p.?"),
# Case where multiple transcripts have valid HGVS, but the first valid one is chosen
(
"NM_000001.1",
"",
[("NM_000002.1", ""), ("NM_000003.1", "p.Lys300Thr")],
"NM_000001.1:p.Lys300Thr",
),
],
)
def test_choose_hgvs_p(main_hgvs, main_hgvs_p, transcripts_data, expected_result):
# Mocking the main and other transcripts
main_transcript = MagicMock(hgvs_p=main_hgvs_p)
transcripts = [MagicMock(feature_id=id, hgvs_p=hgvs_p) for id, hgvs_p in transcripts_data]

# Invoke the method under test
result = SeqVarPVS1Helper._choose_hgvs_p(main_hgvs, main_transcript, transcripts) # type: ignore

# Verify the result
assert result == expected_result


# TODO: Check if the termination number is correct
Expand Down

0 comments on commit 24fcc7c

Please sign in to comment.