Increasing coverage of unit tests (EI-CoreBioinformatics#183)

lucventurini · Aug 12, 2019 · fa3b4a9 · fa3b4a9
1 parent 87938d1
commit fa3b4a9
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 55 deletions.
diff --git a/Mikado/configuration/configurator.py b/Mikado/configuration/configurator.py
@@ -36,7 +36,7 @@ def extend_with_default(validator_class, resolver=None, simple=False):
     so that they also set the default values provided inside the schema
     itself. Source:
     https://python-jsonschema.readthedocs.org/en/latest/faq/?highlight=default
-    :param validator_class: the validator class to extend (e.g. Draft4Validator)
+    :param validator_class: the validator class to extend (e.g. Draft7Validator)
 
     :param simple: boolean flag. If set to True, only required properties will be extended.
     :type simple: bool
@@ -158,9 +158,9 @@ def check_scoring(json_conf):
         if parameter in parameters_found:
             double_parameters.append(parameter)
 
-        if not jsonschema.Draft4Validator(scoring_schema).is_valid(
+        if not jsonschema.Draft7Validator(scoring_schema).is_valid(
                 jdict[parameter]):
-            errors = [str(_) for _ in list(jsonschema.Draft4Validator(scoring_schema).iter_errors(
+            errors = [str(_) for _ in list(jsonschema.Draft7Validator(scoring_schema).iter_errors(
                 jdict[parameter]))]
             raise InvalidJson("Invalid scoring for {}:\n{}".format(
                 parameter, "\n".join(errors)))
@@ -318,9 +318,9 @@ def check_requirements(json_conf, require_schema, index):
             else:
                 parameters_not_found.append(key_name)
                 continue
-        if not jsonschema.Draft4Validator(require_schema["definitions"]["parameter"]).is_valid(
+        if not jsonschema.Draft7Validator(require_schema["definitions"]["parameter"]).is_valid(
                 json_conf[index]["parameters"][key]):
-            errors = list(jsonschema.Draft4Validator(require_schema).iter_errors(
+            errors = list(jsonschema.Draft7Validator(require_schema).iter_errors(
                 json_conf[index]["parameters"][key]
             ))
             raise InvalidJson("Invalid parameter for {0} in {1}: \n{2}".format(
@@ -347,7 +347,7 @@ def check_requirements(json_conf, require_schema, index):
         newexpr = json_conf[index]["expression"][:]
         json_conf[index]["__expression"] = json_conf[index]["expression"][:]
     else:
-        if not jsonschema.Draft4Validator(
+        if not jsonschema.Draft7Validator(
                 require_schema["definitions"]["expression"]).is_valid(
                     json_conf[index]["expression"]):
             raise InvalidJson("Invalid expression field")
@@ -434,7 +434,7 @@ def create_validator(simple=False):
     :type simple: bool
 
     :return validator
-    :rtype: jsonschema.Draft4Validator
+    :rtype: jsonschema.Draft7Validator
     """
 
     validator = extend_with_default(jsonschema.Draft7Validator,

diff --git a/Mikado/loci/abstractlocus.py b/Mikado/loci/abstractlocus.py
@@ -12,7 +12,7 @@
 import networkx
 from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 import numpy
-from ..transcripts.clique_methods import find_cliques, find_communities, define_graph
+from ..transcripts.clique_methods import find_communities, define_graph
 from ..transcripts.transcript import Transcript
 from ..configuration.configurator import to_json, check_json
 from ..exceptions import NotInLocusError
@@ -110,9 +110,7 @@ def __init__(self,
 
         self.scores_calculated = False
         self.scores = dict()
-        self.__cds_introntree = IntervalTree()
         self.__segmenttree = IntervalTree()
-        self.__cds_segmenttree = IntervalTree()
         self.__regressor = None
         self.session = None
         self.metrics_calculated = False
@@ -387,20 +385,20 @@ def find_communities(self, graph: networkx.Graph) -> list:
 
         return find_communities(graph, self.logger)
 
-    def find_cliques(self, graph: networkx.Graph) -> (networkx.Graph, list):
-        """
-
-        :param graph: graph to which it is necessary to call the cliques for.
-
-        Wrapper for the BronKerbosch algorithm, which returns the maximal cliques in the graph.
-        It is the new interface for the BronKerbosch function, which is not called directly
-        from outside this class any longer.
-        The "inters" keyword provides the function used to determine
-        whether two vertices are connected or not in the graph.
-        """
-
-        return find_cliques(graph, self.logger)
-
+    # def find_cliques(self, graph: networkx.Graph) -> (networkx.Graph, list):
+    #     """
+    #
+    #     :param graph: graph to which it is necessary to call the cliques for.
+    #
+    #     Wrapper for the BronKerbosch algorithm, which returns the maximal cliques in the graph.
+    #     It is the new interface for the BronKerbosch function, which is not called directly
+    #     from outside this class any longer.
+    #     The "inters" keyword provides the function used to determine
+    #     whether two vertices are connected or not in the graph.
+    #     """
+    #
+    #     return find_cliques(graph, self.logger)
+    #
     def choose_best(self, transcripts: dict) -> str:
         """
         :param transcripts: the dictionary of transcripts of the instance
@@ -1561,37 +1559,13 @@ def segmenttree(self):
 
         return self.__segmenttree
 
-    @property
-    def cds_segmenttree(self):
-
-        if len(self.__cds_segmenttree) != len(self.combined_cds_exons) + len(self.combined_cds_introns):
-            self.__cds_segmenttree = self._calculate_segment_tree(self.combined_cds_exons, self.combined_cds_introns)
-
-        return self.__cds_segmenttree
-
     @staticmethod
     def _calculate_segment_tree(exons, introns):
 
         return IntervalTree.from_intervals(
                 [Interval(*_, value="exon") for _ in exons] + [Interval(*_, value="intron") for _ in introns]
             )
 
-    @property
-    def _cds_introntree(self):
-
-        """
-        :rtype: IntervalTree
-        """
-
-        if len(self.__cds_introntree) != len(self.combined_cds_introns):
-            self.__cds_introntree = IntervalTree.from_tuples(
-                [(_[0], _[1] + 1) for _ in self.combined_cds_introns])
-        return self.__cds_introntree
-
-    @property
-    def longest_transcript(self):
-        return max([len(_) for _ in self.transcripts.values()])
-
     @property
     def regressor(self):
         return self.__regressor

diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
@@ -183,7 +183,7 @@ def finalize_alternative_splicing(self):
             else:
                 self.logger.debug("No transcripts with retained introns found.")
 
-            if self.perform_padding is True:
+            if self.perform_padding is True and len(self.transcripts) > 1:
                 self.logger.debug("Starting padding procedure for %s", self.id)
                 failed = self.__launch_padding()
                 if failed:
@@ -779,10 +779,13 @@ def pad_transcripts(self) -> set:
         five_graph = self.define_graph(objects=self.transcripts, inters=self._share_extreme, three_prime=False)
         three_graph = self.define_graph(objects=self.transcripts, inters=self._share_extreme, three_prime=True)
 
+        self.logger.warning("5' graph: %s", five_graph.edges)
+        self.logger.warning("3' graph: %s", three_graph.edges)
         # TODO: Tie breaks!
 
         __to_modify = self._find_communities_boundaries(five_graph, three_graph)
 
+        self.logger.warning("To modify: %s", __to_modify)
         templates = set()
 
         # Now we can do the proper modification
@@ -964,7 +967,7 @@ def _share_three_prime(self, first: Transcript, second: Transcript):
         matched = second.segmenttree.find(first.exons[-1][0], first.exons[-1][1])
         if matched[-1].value == "intron" or first.exons[-1][1] > matched[-1].end:
             decision = False
-            reason = "{second.id} last exon ends within an intron of {first.id}".format(**locals())
+            reason = "{first.id} last exon ends within an intron of {second.id}".format(**locals())
         else:
             downstream = [_ for _ in second.find_downstream(first.exons[-1][0], first.exons[-1][1])
                           if _.value == "exon" and _ not in matched]
@@ -1304,6 +1307,9 @@ def _enlarge_start(transcript: Transcript,
             upstream += sum(_[1] - _[0] + 1 for _ in upstream_exons)
             up_exons.extend([(_[0], _[1]) for _ in upstream_exons])
         elif intersecting_upstream[0].value == "intron":
+            # Check whether the first exon of the model *ends* within an *intron* of the template
+            # If that is the case, we have to keep the first exon in place and
+            # just expand it until the end
             # Now we have to expand until the first exon in the upstream_exons
             if intersecting_upstream[0][1] == transcript.exons[0][0] - 1:
                 assert upstream_exons

diff --git a/Mikado/tests/locus_test.py b/Mikado/tests/locus_test.py
@@ -124,6 +124,34 @@ def test_basic(self):
         del gene.logger
         self.assertIs(gene.logger, None)
 
+        new_gene = pickle.loads(pickle.dumps(gene))
+        self.assertEqual(gene, new_gene)
+        self.assertEqual(gene.transcripts, new_gene.transcripts)
+        with self.assertRaises(ValueError):
+            g = gene.format("foo")
+
+    def test_less_than(self):
+
+        g1 = Gene(self.tothers)
+        g2 = Gene(self.tout)
+        g3 = Gene(self.t1)
+        self.assertLess(g1, g2)
+        self.assertGreater(g3, g2)
+        self.assertGreater(g3, g1)
+
+    def test_deletion(self):
+        gene = Gene(self.t1)
+        self.assertEqual(gene.id, self.t1.parent[0])
+        for attr in ["chrom", "source", "start", "end", "strand"]:
+            self.assertEqual(getattr(gene, attr), getattr(self.t1, attr))
+        gene.add(self.t2)
+        self.assertIn(self.t2.id, gene)
+        gene.finalize()
+        gene.remove(self.t2.id)
+        self.assertEqual((gene.start, gene.end), (self.t1.start, self.t1.end))
+        gene.remove(self.t1.id)
+        self.assertEqual((gene.start, gene.end), (None, None))
+
     def test_different_strand(self):
         gene = Gene(self.t1)
         with self.assertRaises(AssertionError):
@@ -480,7 +508,7 @@ def test_empty_locus(self):
         sl = Superlocus(t1)
         sl.check_configuration()
         sl.remove_transcript_from_locus(t1.id)
-        _ = sl.cds_segmenttree
+        _ = sl.segmenttree
 
     def test_verified_introns(self):
 
@@ -3223,7 +3251,7 @@ def test_expand_both_sides(self):
 
         transcript = Transcript()
         transcript.chrom, transcript.strand, transcript.id = "Chr5", "+", "test"
-        transcript.add_exons([(100053, 100220), (100657, 101832)])
+        transcript.add_exons([(100053, 100220), (100640, 101832)])
         transcript.finalize()
 
         template = Transcript()
@@ -3237,7 +3265,7 @@ def test_expand_both_sides(self):
             expand_transcript(transcript, template, template, self.fai, logger=logger)
             self.assertEqual(
                 transcript.exons,
-                [(99726, 100220), (100657, 102000)]
+                [(99726, 100220), (100640, 102000)]
 
             )
 

diff --git a/Mikado/tests/test_modifications.py b/Mikado/tests/test_modifications.py
@@ -178,8 +178,10 @@ def test_basic_padding(self):
 
     def test_locus_padding_equal_or_n(self):
 
-        for num, exons_to_add in enumerate([((26574970, 26575410), (26578519, 26578725)),
-                             ((26574970, 26575410), (26574650, 26574820), (26578519, 26578725), (26579325, 26579700))]):
+        for num, exons_to_add in enumerate([
+            ((26574970, 26575410), (26578519, 26578725)),
+            ((26574970, 26575410), (26574650, 26574820),
+             (26578519, 26578725), (26579325, 26579700))]):
 
             for num2, pad_transcripts in enumerate((False, True)):
                 with self.subTest(exons_to_add=exons_to_add, pad_transcripts=pad_transcripts):