Skip to content

Commit

Permalink
Mikado will now crash if the scoring file exists and is malformed. Al…
Browse files Browse the repository at this point in the history
…so, added a test for the junction loading.
  • Loading branch information
lucventurini committed Mar 12, 2021
1 parent 8724ee6 commit 2955ea4
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 29 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ Other changes:
in BED12 format as sources of valid junctions.
- Slightly increased the unit-test coverage for the locus classes, e.g. properly covering the `as_dict` and `load_dict`
methods. Minor bugfixes related to the introduction of these unit-tests.
- `Mikado.parsers.to_gff` has been renamed to `Mikado.parsers.parser_factory`.
- `Mikado.parsers.to_gff` has been renamed to `Mikado.parsers.parser_factory`.
- Mikado will error informatively if the scoring configuration file is malformed.

# Version 2.1.1

Expand Down
12 changes: 5 additions & 7 deletions Mikado/configuration/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def load_scoring(self, logger=None):
logger = create_null_logger("check_scoring")
if self.pick.scoring_file is None:
if self._loaded_scoring != self.pick.scoring_file:
logger.debug("Resetting the scoring to its previous value")
logger.warning(f"Resetting the scoring to its previous value ({self._loaded_scoring})")
self.pick.scoring_file = self._loaded_scoring
elif self._loaded_scoring != self.pick.scoring_file:
logger.debug("Overwriting the scoring self using '%s' as scoring file", self.pick.scoring_file)
Expand Down Expand Up @@ -137,13 +137,11 @@ def load_scoring(self, logger=None):
checked = ScoringFile.Schema().load(scoring)
checked.check(minimal_orf_length=self.pick.orf_loading.minimal_orf_length)
self._loaded_scoring = self.pick.scoring_file
# self = check_scoring(self)
# self = check_all_requirements(self)
except (InvalidConfiguration, ValidationError) as exc:
logger.debug("Invalid option: %s", option)
logger.warning(exc)
continue
# self.scoring = dataclasses.asdict(checked.scoring)
msg = f"The configuration file {option} is invalid:\n"
msg += str(exc)
logger.critical(msg)
raise InvalidConfiguration(msg)
self.scoring = checked
found = True
self.pick.scoring_file = option
Expand Down
28 changes: 15 additions & 13 deletions Mikado/loci/superlocus.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,20 +527,18 @@ async def _load_introns(self):
"""

if len(self.introns) == 0:
if self.monoexonic is False:
raise ValueError("%s is multiexonic but has no introns defined!",
self.id)
assert self.monoexonic is True, f"{self.id} is multiexonic but has no introns defined!"
self.logger.debug("No introns for %s", self.id)
return

self.logger.debug("Querying the DB for introns, %d total", len(self.introns))
if not self.configuration.db_settings.db:
return # No data to load

ver_introns = dict(((junc.junction_start, junc.junction_end), junc.strand)
for junc in junction_baked(self.session).params(
chrom=self.chrom, junctionStart=self.start, junctionEnd=self.end
))
ver_introns = collections.defaultdict(set)
for junc in junction_baked(self.session).params(chrom=self.chrom,
junctionStart=self.start, junctionEnd=self.end):
ver_introns[(junc.junction_start, junc.junction_end)].add(junc.strand)

self.logger.debug("Found %d verifiable introns for %s",
len(ver_introns), self.id)
Expand All @@ -549,11 +547,15 @@ async def _load_introns(self):
self.logger.debug("Checking %s%s:%d-%d",
self.chrom, self.strand, intron[0], intron[1])
if (intron[0], intron[1]) in ver_introns:
self.logger.debug("Verified intron %s:%d-%d",
self.chrom, intron[0], intron[1])
self.locus_verified_introns.add((intron[0],
intron[1],
ver_introns[(intron[0], intron[1])]))
if self.stranded is False:
for strand in ver_introns[(intron[0], intron[1])]:
self.locus_verified_introns.add((intron[0],
intron[1],
strand))
elif self.strand in ver_introns[(intron[0], intron[1])]:
self.locus_verified_introns.add((intron[0],
intron[1],
self.strand))

async def get_sources(self):
if self.configuration.pick.output_format.report_all_external_metrics is True:
Expand All @@ -566,7 +568,7 @@ async def get_sources(self):
if param.startswith("external")})
sources.update({param for param in self.configuration.scoring.cds_requirements.parameters.keys()
if param.startswith("external")})
sources.update({param for param in self.configuration.scoring.cds_requirements.parameters.keys()
sources.update({param for param in self.configuration.scoring.as_requirements.parameters.keys()
if param.startswith("external")})
sources.update({param for param in self.configuration.scoring.scoring.keys()
if param.startswith("external")})
Expand Down
13 changes: 11 additions & 2 deletions Mikado/serializers/junction.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,19 @@ def __init__(self, junction_start, junction_end, name, strand, score, chrom_id):
self.score = score

def __str__(self):
return "{chrom}\t{start}\t{end}".format(
return "{chrom}\t{start}\t{end}\t{strand}".format(
chrom=self.chrom,
start=self.start,
end=self.end
end=self.end,
strand=self.strand
)

def __repr__(self):
return "{chrom}\t{start}\t{end}\t{strand}".format(
chrom=self.chrom,
start=self.start,
end=self.end,
strand=self.strand
)

@hybrid_method
Expand Down
118 changes: 112 additions & 6 deletions Mikado/tests/test_external_async.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import asyncio
from Mikado._transcripts.scoring_configuration import MinMaxScore
import itertools
from Mikado._transcripts.scoring_configuration import MinMaxScore, SizeFilter
from Mikado.configuration.configurator import load_and_validate_config
from Mikado.loci import Superlocus
from Mikado.serializers.external import External, ExternalSource
from Mikado.serializers.blast_serializer.query import Query
from Mikado.serializers.orf import Orf
from Mikado.serializers.junction import Chrom, Junction
from Mikado.transcripts import Transcript
from Mikado.utilities.dbutils import DBBASE
from Mikado.utilities.dbutils import DBBASE as db
import unittest
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
import tempfile


class AsyncExternalTest(unittest.TestCase):
Expand Down Expand Up @@ -41,7 +43,6 @@ def test_get_external(self):
'use_raw': True, 'percentage': True})
transcript.attributes["tpm"] = 10

db = DBBASE
int_source = ExternalSource('int', 'int', 0)
float_source = ExternalSource('float', 'float', 0)
bool_source = ExternalSource('bool', 'bool', 0)
Expand All @@ -61,10 +62,8 @@ def test_get_external(self):
query = Query(transcript.id, transcript.cdna_length)
query2 = Query(transcript2.id, transcript2.cdna_length)

dbname = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
engine = create_engine("sqlite:///:memory:")
db.metadata.create_all(engine)
print(engine.url, dbname.name)
SessionMaker = sessionmaker(bind=engine)
session = SessionMaker()
session.add_all([int_source, float_source, bool_source, raw_int_source, raw_float_source, raw_bool_source])
Expand All @@ -90,3 +89,110 @@ def test_get_external(self):
'raw_bool': (True, True)
}
})

sup.configuration.pick.output_format.report_all_external_metrics = False
external = asyncio.run(sup.get_external(qobj, [1]))
self.assertEqual(len(external), 0)
# These are meaningless it's just to verify we are loading *only* these metrics.
# We should *NOT* have 'float' as it is not present in any section.
sup.configuration.scoring.scoring["external.int"] = MinMaxScore(rescaling="max", filter=None)
sup.configuration.scoring.requirements.parameters["external.raw_float"] = SizeFilter(operator="gt",
value=100)
sup.configuration.scoring.cds_requirements.parameters["external.raw_int"] = SizeFilter(operator="lt",
value=1)
sup.configuration.scoring.as_requirements.parameters["external.raw_bool"] = SizeFilter(operator="lt",
value=1)
sup.configuration.scoring.not_fragmentary.parameters["external.bool"] = SizeFilter(operator="ne",
value=False)
external = asyncio.run(sup.get_external(qobj, [1]))
self.assertEqual(external, {
'ENST00000560636': {
'int': (10, False),
'raw_float': (8.0, True),
'bool': (False, False),
'raw_int': (8, True),
'raw_bool': (True, True)
}
})


class AsyncJunctionTest(unittest.TestCase):

def test_retrieval(self):
engine = create_engine("sqlite:///:memory:")
db.metadata.create_all(engine)
SessionMaker = sessionmaker(bind=engine)
session = SessionMaker()

transcript = Transcript(accept_undefined_multi=True)
transcript.chrom = "15"
transcript.source = "protein_coding"
transcript.start = 47631264
transcript.end = 48051999

exons = [(47631264, 47631416),
(47704590, 47704669),
(47762671, 47762742),
(47893062, 47893093),
(47895572, 47895655),
(48051942, 48051999)]

transcript.strand = "+"
transcript.add_exons(exons)
transcript.id = "ENST00000560636"
transcript.parent = "ENSG00000137872"
transcript2 = transcript.copy()
transcript2.id = "ENST00000560637"

chrom_one = Chrom("1", 10**8)
chrom_fifteen = Chrom("15", 5 * 10 ** 8)
session.add_all([chrom_one, chrom_fifteen])
session.commit()
# junction_start, junction_end, name, strand, score, chrom_id)
# This junction is on a different chrom
junction_chrom_one = Junction(47704669 + 1, 47762671 - 1, "chrom_one", "+", 10, chrom_one.chrom_id)
# This junction is too far away
outside_chrom_15 = Junction(47704669 - 10 ** 6 + 1, 47762671 - 10 ** 6 - 1, "chrom_15_outside", "+", 10,
chrom_fifteen.chrom_id)
# This junction is in the right place but wrong strand
wrong_strand_chrom_15 = Junction(47704669 + 1, 47762671 - 1, "chrom_15_wrong_strand", "-", 10,
chrom_fifteen.chrom_id)
# This one is correct
chrom_15_junction = Junction(47704669 + 1, 47762671 - 1, "chrom_15", "+", 10, chrom_fifteen.chrom_id)
session.add_all([junction_chrom_one, outside_chrom_15, wrong_strand_chrom_15, chrom_15_junction])
session.commit()

self.assertEqual(junction_chrom_one.chrom, "1")
for junc in [outside_chrom_15, wrong_strand_chrom_15, chrom_15_junction]:
self.assertEqual(junc.chrom, "15")

for strand, stranded in itertools.product(("+", "-", None), (True, False)):
transcript.unfinalize()
transcript.strand = strand
transcript.finalize()
sup = Superlocus(transcript, stranded=stranded)
self.assertTrue((chrom_15_junction.junction_start, chrom_15_junction.end) in
sup.introns, (chrom_15_junction, sup.introns))
sup.session = session
asyncio.run(sup._load_introns())
if stranded is True and strand is not None:
self.assertEqual(sup.locus_verified_introns, {(chrom_15_junction.junction_start,
chrom_15_junction.junction_end,
strand)},
(stranded, strand))
elif stranded is False:
self.assertEqual(sup.locus_verified_introns, {(chrom_15_junction.junction_start,
chrom_15_junction.junction_end,
chrom_15_junction.strand),
(wrong_strand_chrom_15.junction_start,
wrong_strand_chrom_15.junction_end,
wrong_strand_chrom_15.strand)},
(stranded, strand))
elif stranded is True and strand is None:
self.assertEqual(sup.locus_verified_introns, set())


class AsyncOrfLoading(unittest.TestCase):

def test_load_orfs(self):
""""""

0 comments on commit 2955ea4

Please sign in to comment.