Skip to content

Commit

Permalink
Increasing coverage of unit tests (EI-CoreBioinformatics#183)
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Aug 12, 2019
1 parent 87938d1 commit fa3b4a9
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 55 deletions.
14 changes: 7 additions & 7 deletions Mikado/configuration/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def extend_with_default(validator_class, resolver=None, simple=False):
so that they also set the default values provided inside the schema
itself. Source:
https://python-jsonschema.readthedocs.org/en/latest/faq/?highlight=default
:param validator_class: the validator class to extend (e.g. Draft4Validator)
:param validator_class: the validator class to extend (e.g. Draft7Validator)
:param simple: boolean flag. If set to True, only required properties will be extended.
:type simple: bool
Expand Down Expand Up @@ -158,9 +158,9 @@ def check_scoring(json_conf):
if parameter in parameters_found:
double_parameters.append(parameter)

if not jsonschema.Draft4Validator(scoring_schema).is_valid(
if not jsonschema.Draft7Validator(scoring_schema).is_valid(
jdict[parameter]):
errors = [str(_) for _ in list(jsonschema.Draft4Validator(scoring_schema).iter_errors(
errors = [str(_) for _ in list(jsonschema.Draft7Validator(scoring_schema).iter_errors(
jdict[parameter]))]
raise InvalidJson("Invalid scoring for {}:\n{}".format(
parameter, "\n".join(errors)))
Expand Down Expand Up @@ -318,9 +318,9 @@ def check_requirements(json_conf, require_schema, index):
else:
parameters_not_found.append(key_name)
continue
if not jsonschema.Draft4Validator(require_schema["definitions"]["parameter"]).is_valid(
if not jsonschema.Draft7Validator(require_schema["definitions"]["parameter"]).is_valid(
json_conf[index]["parameters"][key]):
errors = list(jsonschema.Draft4Validator(require_schema).iter_errors(
errors = list(jsonschema.Draft7Validator(require_schema).iter_errors(
json_conf[index]["parameters"][key]
))
raise InvalidJson("Invalid parameter for {0} in {1}: \n{2}".format(
Expand All @@ -347,7 +347,7 @@ def check_requirements(json_conf, require_schema, index):
newexpr = json_conf[index]["expression"][:]
json_conf[index]["__expression"] = json_conf[index]["expression"][:]
else:
if not jsonschema.Draft4Validator(
if not jsonschema.Draft7Validator(
require_schema["definitions"]["expression"]).is_valid(
json_conf[index]["expression"]):
raise InvalidJson("Invalid expression field")
Expand Down Expand Up @@ -434,7 +434,7 @@ def create_validator(simple=False):
:type simple: bool
:return validator
:rtype: jsonschema.Draft4Validator
:rtype: jsonschema.Draft7Validator
"""

validator = extend_with_default(jsonschema.Draft7Validator,
Expand Down
56 changes: 15 additions & 41 deletions Mikado/loci/abstractlocus.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import networkx
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import numpy
from ..transcripts.clique_methods import find_cliques, find_communities, define_graph
from ..transcripts.clique_methods import find_communities, define_graph
from ..transcripts.transcript import Transcript
from ..configuration.configurator import to_json, check_json
from ..exceptions import NotInLocusError
Expand Down Expand Up @@ -110,9 +110,7 @@ def __init__(self,

self.scores_calculated = False
self.scores = dict()
self.__cds_introntree = IntervalTree()
self.__segmenttree = IntervalTree()
self.__cds_segmenttree = IntervalTree()
self.__regressor = None
self.session = None
self.metrics_calculated = False
Expand Down Expand Up @@ -387,20 +385,20 @@ def find_communities(self, graph: networkx.Graph) -> list:

return find_communities(graph, self.logger)

def find_cliques(self, graph: networkx.Graph) -> (networkx.Graph, list):
"""
:param graph: graph to which it is necessary to call the cliques for.
Wrapper for the BronKerbosch algorithm, which returns the maximal cliques in the graph.
It is the new interface for the BronKerbosch function, which is not called directly
from outside this class any longer.
The "inters" keyword provides the function used to determine
whether two vertices are connected or not in the graph.
"""

return find_cliques(graph, self.logger)

# def find_cliques(self, graph: networkx.Graph) -> (networkx.Graph, list):
# """
#
# :param graph: graph to which it is necessary to call the cliques for.
#
# Wrapper for the BronKerbosch algorithm, which returns the maximal cliques in the graph.
# It is the new interface for the BronKerbosch function, which is not called directly
# from outside this class any longer.
# The "inters" keyword provides the function used to determine
# whether two vertices are connected or not in the graph.
# """
#
# return find_cliques(graph, self.logger)
#
def choose_best(self, transcripts: dict) -> str:
"""
:param transcripts: the dictionary of transcripts of the instance
Expand Down Expand Up @@ -1561,37 +1559,13 @@ def segmenttree(self):

return self.__segmenttree

@property
def cds_segmenttree(self):

if len(self.__cds_segmenttree) != len(self.combined_cds_exons) + len(self.combined_cds_introns):
self.__cds_segmenttree = self._calculate_segment_tree(self.combined_cds_exons, self.combined_cds_introns)

return self.__cds_segmenttree

@staticmethod
def _calculate_segment_tree(exons, introns):

return IntervalTree.from_intervals(
[Interval(*_, value="exon") for _ in exons] + [Interval(*_, value="intron") for _ in introns]
)

@property
def _cds_introntree(self):

"""
:rtype: IntervalTree
"""

if len(self.__cds_introntree) != len(self.combined_cds_introns):
self.__cds_introntree = IntervalTree.from_tuples(
[(_[0], _[1] + 1) for _ in self.combined_cds_introns])
return self.__cds_introntree

@property
def longest_transcript(self):
return max([len(_) for _ in self.transcripts.values()])

@property
def regressor(self):
return self.__regressor
Expand Down
10 changes: 8 additions & 2 deletions Mikado/loci/locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def finalize_alternative_splicing(self):
else:
self.logger.debug("No transcripts with retained introns found.")

if self.perform_padding is True:
if self.perform_padding is True and len(self.transcripts) > 1:
self.logger.debug("Starting padding procedure for %s", self.id)
failed = self.__launch_padding()
if failed:
Expand Down Expand Up @@ -779,10 +779,13 @@ def pad_transcripts(self) -> set:
five_graph = self.define_graph(objects=self.transcripts, inters=self._share_extreme, three_prime=False)
three_graph = self.define_graph(objects=self.transcripts, inters=self._share_extreme, three_prime=True)

self.logger.warning("5' graph: %s", five_graph.edges)
self.logger.warning("3' graph: %s", three_graph.edges)
# TODO: Tie breaks!

__to_modify = self._find_communities_boundaries(five_graph, three_graph)

self.logger.warning("To modify: %s", __to_modify)
templates = set()

# Now we can do the proper modification
Expand Down Expand Up @@ -964,7 +967,7 @@ def _share_three_prime(self, first: Transcript, second: Transcript):
matched = second.segmenttree.find(first.exons[-1][0], first.exons[-1][1])
if matched[-1].value == "intron" or first.exons[-1][1] > matched[-1].end:
decision = False
reason = "{second.id} last exon ends within an intron of {first.id}".format(**locals())
reason = "{first.id} last exon ends within an intron of {second.id}".format(**locals())
else:
downstream = [_ for _ in second.find_downstream(first.exons[-1][0], first.exons[-1][1])
if _.value == "exon" and _ not in matched]
Expand Down Expand Up @@ -1304,6 +1307,9 @@ def _enlarge_start(transcript: Transcript,
upstream += sum(_[1] - _[0] + 1 for _ in upstream_exons)
up_exons.extend([(_[0], _[1]) for _ in upstream_exons])
elif intersecting_upstream[0].value == "intron":
# Check whether the first exon of the model *ends* within an *intron* of the template
# If that is the case, we have to keep the first exon in place and
# just expand it until the end
# Now we have to expand until the first exon in the upstream_exons
if intersecting_upstream[0][1] == transcript.exons[0][0] - 1:
assert upstream_exons
Expand Down
34 changes: 31 additions & 3 deletions Mikado/tests/locus_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,34 @@ def test_basic(self):
del gene.logger
self.assertIs(gene.logger, None)

new_gene = pickle.loads(pickle.dumps(gene))
self.assertEqual(gene, new_gene)
self.assertEqual(gene.transcripts, new_gene.transcripts)
with self.assertRaises(ValueError):
g = gene.format("foo")

def test_less_than(self):

g1 = Gene(self.tothers)
g2 = Gene(self.tout)
g3 = Gene(self.t1)
self.assertLess(g1, g2)
self.assertGreater(g3, g2)
self.assertGreater(g3, g1)

def test_deletion(self):
gene = Gene(self.t1)
self.assertEqual(gene.id, self.t1.parent[0])
for attr in ["chrom", "source", "start", "end", "strand"]:
self.assertEqual(getattr(gene, attr), getattr(self.t1, attr))
gene.add(self.t2)
self.assertIn(self.t2.id, gene)
gene.finalize()
gene.remove(self.t2.id)
self.assertEqual((gene.start, gene.end), (self.t1.start, self.t1.end))
gene.remove(self.t1.id)
self.assertEqual((gene.start, gene.end), (None, None))

def test_different_strand(self):
gene = Gene(self.t1)
with self.assertRaises(AssertionError):
Expand Down Expand Up @@ -480,7 +508,7 @@ def test_empty_locus(self):
sl = Superlocus(t1)
sl.check_configuration()
sl.remove_transcript_from_locus(t1.id)
_ = sl.cds_segmenttree
_ = sl.segmenttree

def test_verified_introns(self):

Expand Down Expand Up @@ -3223,7 +3251,7 @@ def test_expand_both_sides(self):

transcript = Transcript()
transcript.chrom, transcript.strand, transcript.id = "Chr5", "+", "test"
transcript.add_exons([(100053, 100220), (100657, 101832)])
transcript.add_exons([(100053, 100220), (100640, 101832)])
transcript.finalize()

template = Transcript()
Expand All @@ -3237,7 +3265,7 @@ def test_expand_both_sides(self):
expand_transcript(transcript, template, template, self.fai, logger=logger)
self.assertEqual(
transcript.exons,
[(99726, 100220), (100657, 102000)]
[(99726, 100220), (100640, 102000)]

)

Expand Down
6 changes: 4 additions & 2 deletions Mikado/tests/test_modifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,10 @@ def test_basic_padding(self):

def test_locus_padding_equal_or_n(self):

for num, exons_to_add in enumerate([((26574970, 26575410), (26578519, 26578725)),
((26574970, 26575410), (26574650, 26574820), (26578519, 26578725), (26579325, 26579700))]):
for num, exons_to_add in enumerate([
((26574970, 26575410), (26578519, 26578725)),
((26574970, 26575410), (26574650, 26574820),
(26578519, 26578725), (26579325, 26579700))]):

for num2, pad_transcripts in enumerate((False, True)):
with self.subTest(exons_to_add=exons_to_add, pad_transcripts=pad_transcripts):
Expand Down

0 comments on commit fa3b4a9

Please sign in to comment.