EI-CoreBioinformatics · lucventurini · Oct 28, 2019 · Oct 20, 2019 · Oct 24, 2019 · Oct 24, 2019
diff --git a/Mikado/configuration/daijin_schema.json b/Mikado/configuration/daijin_schema.json
@@ -276,7 +276,7 @@
       "- identity: minimum identity for any alignment. Default: 95%",
       "- coverage: minimum coverage for any alignment. Default: 70%"],
       "properties": {
-        "max_mem": {"type": "integer", "default": 6000, "minimum": 1000, "required": true},
+        "max_mem": {"type": "integer", "default": 6000, "minimum": 1000},
         "npaths": {"type": "integer", "default": 0},
         "identity": {"type": "number", "default": 0.95, "minimum": 0, "maximum": 1},
         "coverage": {"type": "number", "default": 0.70, "minimum": 0, "maximum": 1}

diff --git a/Mikado/loci/abstractlocus.py b/Mikado/loci/abstractlocus.py
@@ -570,6 +570,11 @@ def remove_transcript_from_locus(self, tid: str):
             self.initialized = False
 
         self.logger.debug("Deleted %s from %s", tid, self.id)
+        if tid in self._metrics:
+            del self._metrics[tid]
+        if tid in self.scores:
+            del self.scores[tid]
+
         self.metrics_calculated = False
         self.scores_calculated = False
 
@@ -987,6 +992,7 @@ def get_metrics(self):
 
         if self.metrics_calculated is True:
             return
+        self._metrics = dict()
         cds_bases = sum(_[1] - _[0] + 1 for _ in merge_ranges(
             itertools.chain(*[
                 self.transcripts[_].combined_cds for _ in self.transcripts
@@ -1135,20 +1141,20 @@ def _check_not_passing(self, previous_not_passing=set()):
                 assert self.transcripts[tid].json_conf["prepare"]["files"][\
                            "reference"] == self.json_conf["prepare"]["files"]["reference"]
 
-            if self.json_conf["pick"]["run_options"]["check_references"] is False:
-                if self.transcripts[tid].is_reference is True:
-                    # Reference transcripts should be kept in, no matter what.
-                    self.logger.debug("Skipping %s from the requirement check as it is a reference transcript")
-                    continue
-                elif self.transcripts[tid].original_source in self.json_conf["prepare"]["files"]["reference"]:
-                    self.transcripts[tid].is_reference = True  # Bug
-                    self.logger.debug("Skipping %s from the requirement check as it is a reference transcript", tid)
-                    continue
-            else:
+            is_reference = ((self.transcripts[tid].is_reference is True) or
+                            (self.transcripts[tid].original_source in self.json_conf["prepare"]["files"]["reference"]))
+
+            if is_reference is False:
                 self.logger.debug("Transcript %s (source %s) is not a reference transcript (references: %s; in it: %s)",
                                   tid, self.transcripts[tid].original_source,
                                   self.json_conf["prepare"]["files"]["reference"],
-                                  self.transcripts[tid].original_source in self.json_conf["prepare"]["files"]["reference"])
+                                  self.transcripts[tid].original_source in self.json_conf["prepare"]["files"][
+                                      "reference"])
+            elif is_reference is True and self.json_conf["pick"]["run_options"]["check_references"] is False:
+                self.logger.debug("Skipping %s from the requirement check as it is a reference transcript", tid)
+                continue
+            elif is_reference is True and self.json_conf["pick"]["run_options"]["check_references"] is True:
+                self.logger.debug("Performing the requirement check for %s even if it is a reference transcript", tid)
 
             evaluated = dict()
             for key in self.json_conf["requirements"]["parameters"]:

diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
@@ -349,6 +349,10 @@ def __remove_redundant_after_padding(self):
             continue
         return
 
+    def as_dict(self):
+        self.calculate_scores()
+        return super().as_dict()
+
     def remove_transcript_from_locus(self, tid: str):
 
         """Overloading of the AbstractLocus class, in order to ensure that the primary transcript will *not*
@@ -1066,7 +1070,7 @@ def __set_id(self, string):
             return
         primary_id = "{0}.1".format(string)
         old_primary = self.primary_transcript.id
-        self.primary_transcript.attributes["Alias"] = self.primary_transcript.id
+        self.primary_transcript.attributes["alias"] = self.primary_transcript.id
         self.primary_transcript.id = primary_id
         self.transcripts[primary_id] = self.primary_transcript
         self.primary_transcript_id = primary_id
@@ -1079,7 +1083,7 @@ def __set_id(self, string):
 
         for counter, tid in enumerate(order):
             counter += 2
-            self.transcripts[tid].attributes["Alias"] = tid
+            self.transcripts[tid].attributes["alias"] = tid
             new_id = "{0}.{1}".format(string, counter)
             self.transcripts[tid].id = new_id
             self.transcripts[new_id] = self.transcripts.pop(tid)
@@ -1088,6 +1092,7 @@ def __set_id(self, string):
         if self.scores_calculated is True:
             for tid in mapper:
                 self.scores[mapper[tid]] = self.scores.pop(tid)
+                self._metrics[mapper[tid]] = self._metrics.pop(tid)
         if self.metrics_calculated is True:
             for index in range(len(self.metric_lines_store)):
                 self.metric_lines_store[index]["tid"] = mapper[self.metric_lines_store[index]["tid"]]
@@ -1154,7 +1159,7 @@ def ts_max_splices(self):
 
     @property
     def has_reference_transcript(self):
-        return any(self.transcripts[transcript].is_reference is True for transcript in self)
+        return any(self.transcripts[transcript].is_reference for transcript in self)
 
     def _get_alternative_splicing_codes(self):
         """Method to retrieve the currently valid alternative splicing event codes"""

diff --git a/Mikado/subprograms/serialise.py b/Mikado/subprograms/serialise.py
@@ -158,7 +158,7 @@ def load_external(args, logger):
 
     """Function to load external data from."""
 
-    if args.json_conf["serialise"]["files"]["external_scores"] is None:
+    if args.json_conf["serialise"]["files"]["external_scores"] in (None, ""):
         logger.debug("No external scores to load, returning")
         return
     else:

diff --git a/sample_data/Snakefile b/sample_data/Snakefile
@@ -38,8 +38,8 @@ configfile: "configuration.yaml"
 
 rule complete:
     input: "compare.stats", "compare_subloci.stats", "compare_input.stats", "check.ok",
-         "check_metrics.ok", "daijin_test/mikado.yaml", "g11.ok", "refmap_check.ok", "refmap_check_pc.ok"
-         "external.ok"
+         "check_metrics.ok", "daijin_test/mikado.yaml", "g11.ok", "refmap_check.ok", "refmap_check_pc.ok",
+         "external.ok", "alias_check.ok"
     output: touch("finished.ok")
 
 
@@ -282,6 +282,21 @@ rule check_external_pick:
         assert "external.tpm" in scores.columns
         assert scores["external.tpm"].max() > 0
 
+rule check_pick_confusing_alias:
+    input:
+       db=rules.daijin.output.db,
+       prepare="mikado_prepared.conf_alias.gtf"
+    output: touch("alias_check.ok")
+    log: os.path.join("Daijin", "5-mikado", "pick", "confusing_alias", "pick.log")
+    params:
+       outdir=os.path.join("Daijin", "5-mikado", "pick", "confusing_alias")
+    threads: 2
+    shell: "mikado pick --only-reference-update  --source Mikado_permissive --mode=permissive \
+    --procs={threads} --start-method=spawn \
+    --json-conf=configuration.yaml -od {params.outdir} \
+    -l {log} --loci-out mikado-permissive.loci.gff3 -lv INFO \
+    -db {input.db} {input.prepare}"
+
 rule test_g11_prodigal:
     input:
       transcripts=rules.daijin.output.prep_fasta
@@ -362,6 +377,8 @@ rule clean:
                                     ["configuration.yaml"]):
             if os.path.exists(filename) and filename not in (".", ".."):
                 shutil.rmtree(filename) if os.path.isdir(filename) else os.remove(filename)
+            if os.path.exists("Daijin"):
+                shutil.rmtree("Daijin")
 
 rule clean_crumbs:
     run: