Merge pull request #21 from lucventurini/development

Bump to version 0.8.8
EI-CoreBioinformatics · Oct 1, 2015 · b588a36 · b588a36
2 parents dc501b9 + a340898
commit b588a36
Show file tree

Hide file tree

Showing 31 changed files with 5,070 additions and 2,193 deletions.
diff --git a/mikado_lib/json_utils.py b/mikado_lib/json_utils.py
@@ -230,6 +230,16 @@ def check_chimera_split(json_conf):
             else:
                 assert json_conf["chimera_split"]["blast_params"]["leniency"] in \
                        ("STRINGENT", "PERMISSIVE", "LENIENT")
+            if "min_overlap_duplication" not in json_conf["chimera_split"]["blast_params"]:
+                json_conf["chimera_split"]["blast_params"]["min_overlap_duplication"] = 0.9
+            else:
+                assert isinstance(json_conf[
+                                      "chimera_split"]["blast_params"]["min_overlap_duplication"],
+                                  float, int)
+                if (json_conf["chimera_split"]["blast_params"]["min_overlap_duplication"] <= 0 or
+                    json_conf["chimera_split"]["blast_params"]["min_overlap_duplication"] > 1):
+                    raise InvalidJson("""The minimum overlap duplication value should be
+                    a percentage i.e. a float between 0 and 1.""")
 
     return json_conf
 

diff --git a/mikado_lib/loci_objects/Creator.py b/mikado_lib/loci_objects/Creator.py
diff --git a/mikado_lib/loci_objects/abstractlocus.py b/mikado_lib/loci_objects/abstractlocus.py
@@ -210,24 +210,6 @@ def evaluate(param: str, conf: dict) -> bool:
 
     # #### Class methods ########
 
-    @classmethod
-    def create_default_logger(cls):
-        """Static method to create a default logging instance for the loci.
-        The default is a null handler (no log)
-        """
-
-        formatter = logging.Formatter(
-            "{asctime} - {levelname} - {lineno} - {funcName} - {processName} - {message}",
-            style="{"
-            )
-
-        logger = logging.getLogger("{0}_logger".format(cls.__name__))
-        handler = logging.NullHandler()
-        handler.setFormatter(formatter)
-        logger.setLevel(logging.WARN)
-        logger.addHandler(handler)
-        return logger
-
     @classmethod
     def in_locus(cls, locus_instance, transcript, flank=0) -> bool:
         """
@@ -427,6 +409,8 @@ def add_transcript_to_locus(self, transcript, check_in_locus=True):
 
         self.combined_cds_introns = set.union(
             self.combined_cds_introns, transcript.combined_cds_introns)
+        assert len(transcript.combined_cds_introns) <= len(self.combined_cds_introns)
+
         self.selected_cds_introns.update(transcript.selected_cds_introns)
 
         self.exons.update(set(transcript.exons))

diff --git a/mikado_lib/loci_objects/locus.py b/mikado_lib/loci_objects/locus.py
@@ -39,6 +39,9 @@ def __init__(self, transcript: Transcript, logger=None):
     def __str__(self, print_cds=True) -> str:
 
         self.feature = self.__name__
+        # Hacky fix to make sure that the primary transcript has the attribute
+        # Set to True in any case.
+        self.primary_transcript.attributes["primary"] = True
 
         return super().__str__(print_cds=print_cds)
 
@@ -65,10 +68,14 @@ def add_transcript_to_locus(self, transcript: Transcript, **kwargs):
             self.logger.debug("%s not added because the Locus has already too many transcripts.",
                               transcript.id)
             to_be_added = False
-        if to_be_added and not self.is_alternative_splicing(transcript):
-            self.logger.debug("%s not added because it is not a valid splicing isoform.",
-                              transcript.id)
-            to_be_added = False
+        if to_be_added:
+            is_alternative, ccode = self.is_alternative_splicing(transcript)
+            if is_alternative is False:
+                self.logger.debug("%s not added because it is not a valid splicing isoform.",
+                                  transcript.id)
+                to_be_added = False
+            else:
+                transcript.attributes["ccode"] = ccode
         if to_be_added and transcript.combined_utr_length > max_utr_lenghts["total"]:
             self.logger.debug("%s not added because it has too much UTR (%d).",
                               transcript.id,
@@ -80,13 +87,12 @@ def add_transcript_to_locus(self, transcript: Transcript, **kwargs):
                               transcript.five_utr_length)
             to_be_added = False
         if to_be_added and transcript.three_utr_length > max_utr_lenghts["three"]:
-            self.logger.debug("%s not added because it has too much 5'UTR (%d).",
+            self.logger.debug("%s not added because it has too much 3'UTR (%d).",
                               transcript.id,
                               transcript.three_utr_length)
             to_be_added = False
 
         if to_be_added and self.json_conf["alternative_splicing"]["keep_retained_introns"] is False:
-            self.find_retained_introns(transcript)
             if transcript.retained_intron_num > 0:
                 self.logger.debug("%s not added because it has %d retained introns.",
                                   transcript.id,
@@ -191,45 +197,45 @@ def is_alternative_splicing(self, other):
         """
 
         is_valid = True
+        main_ccode = None
 
-        if other.id == self.primary_transcript_id:
+        self.find_retained_introns(other)
+        if other.id == self.primary_transcript_id or other.strand != other.strand:
             is_valid = False
-        elif other.strand != other.strand:
-            is_valid = False
-
         elif self.overlap((other.start, other.end), (self.start, self.end)) < 0:
             is_valid = False
-
         elif other.retained_intron_num > 0:
             is_valid = False
 
         valid_ccodes = self.json_conf["alternative_splicing"]["valid_ccodes"]
-        if is_valid is True:
-            for tid in self.transcripts:
+
+        if is_valid is False:
+            return is_valid, main_ccode
+        else:
+            ccodes = []
+            main_result, _ = Assigner.compare(other, self.primary_transcript)
+            main_ccode = main_result.ccode[0]
+            ccodes.append(main_ccode)
+            results = [main_result]
+            for tid in iter(tid for tid in self.transcripts if
+                            tid != self.primary_transcript_id):
                 result, _ = Assigner.compare(other, self.transcripts[tid])
-                self.logger.debug("%s vs. %s: %s",
-                                  tid,
+                results.append(result)
+                ccodes.append(result.ccode[0])
+                self.logger.debug("Comparing secondary transcripts %s vs %s. Ccode: %s",
+                                  tid, other.id, result.ccode[0])
+
+            if main_ccode not in valid_ccodes:
+                self.logger.debug("%s is not a valid splicing isoform. Ccode: %s",
                                   other.id,
-                                  result.ccode[0])
-                if result.ccode[0] not in valid_ccodes:
-                    self.logger.debug(
-                        "%s is not a valid splicing isoform. Ccode: %s",
-                        other.id,
-                        result.ccode[0])
-                    is_valid = False
-                    break
-                if result.n_f1 == 0 or \
-                        ((self.transcripts[tid].monoexonic is False and result.j_f1 == 0) or
-                         self.transcripts[tid].monoexonic is True):
-                    self.logger.debug(
-                        "%s is not a valid splicing isoform. N_f1: %f; J_f1: %f",
-                        other.id,
-                        result.n_f1,
-                        result.j_f1)
-                    is_valid = False
-                    break
-
-        return is_valid
+                                  main_result.ccode[0])
+                is_valid = False
+            elif "_" in ccodes or "=" in ccodes:
+                self.logger.debug("%s is a redundant valid splicing isoform. Ccode: %s",
+                                  other.id,
+                                  main_result.ccode[0])
+                is_valid = False
+            return is_valid, main_ccode
 
     @property
     def __name__(self):