Modified error messages for Mikado prepare, serialise and pick. Now t…

…he default level "INFO" should give appropriate results. Also correcting the codon table issue for EI-CoreBioinformatics#34
lucventurini · Oct 12, 2018 · 6175eb2 · 6175eb2
1 parent a5aeb31
commit 6175eb2
Show file tree

Hide file tree

Showing 10 changed files with 81 additions and 54 deletions.
diff --git a/Mikado/configuration/configuration_blueprint.json b/Mikado/configuration/configuration_blueprint.json
@@ -31,7 +31,7 @@
         "Settings related to the logs. Keys:",
         "- sql_level: verbosity for SQL calls. Default: WARNING.",
         "  In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'",
-        "- log_level: verbosity. Default: WARNING.",
+        "- log_level: verbosity. Default: INFO.",
         "  In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'"
       ],
       "type": "object",
@@ -45,7 +45,7 @@
             "ERROR",
             "CRITICAL"
           ],
-          "default": "WARNING"
+          "default": "INFO"
         },
         "sql_level": {
           "type": "string",
@@ -202,7 +202,8 @@
                   "Scenedesmus obliquus Mitochondrial", "Thraustochytrium Mitochondrial",
                   "Pterobranchia Mitochondrial", "Candidate Division SR1", "Gracilibacteria",
                   "Pachysolen tannophilus Nuclear", "Karyorelict Nuclear", "Condylostoma Nuclear",
-                  "Mesodinium Nuclear", "Peritrich Nuclear", "Blastocrithidia Nuclear"]
+                  "Mesodinium Nuclear", "Peritrich Nuclear", "Blastocrithidia Nuclear"],
+          "default": 0
             }
       }
     },

diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
@@ -938,5 +938,15 @@ def expand_transcript(transcript, new_start, new_end, fai, logger):
         logger.info("Padding %s would lead to an in-frame stop codon. Aborting.",
                     transcript.id)
         return backup
+    else:
+        message = "{transcript.id} has now start {transcript.start}, end {transcript.end}"
+        if ((backup.combined_cds_end != transcript.combined_cds_end) or
+                  (backup.combined_cds_start != transcript.combined_cds_start)):
+            transcript.attributes["cds_padded"] = True
+            message += "; CDS moved to {transcript.combined_cds_start}, end {transcript.combined_cds_end}"
+        else:
+            transcript.attributes["cds_padded"] = False
+            message += "."
+        logger.info(message.format(**locals()))
 
     return transcript
diff --git a/Mikado/loci/superlocus.py b/Mikado/loci/superlocus.py
@@ -444,6 +444,9 @@ def load_transcript_data(self, tid, data_dict):
                 if len(new_tr) > 1:
                     to_add.update(new_tr)
                     to_remove = True
+                    self.logger.info("%s has been split into %d different transcripts.",
+                                     tid, len(new_tr))
+
         del data_dict
         return to_remove, to_add
         # @profile

diff --git a/Mikado/picking/picker.py b/Mikado/picking/picker.py
@@ -96,7 +96,7 @@ def __init__(self, json_conf, commandline=""):
                                          force=True)
 
         # self.setup_logger()
-        self.logger.info("Multiprocessing method: %s",
+        self.logger.debug("Multiprocessing method: %s",
                          self.json_conf["multiprocessing_method"])
 
         # pylint: enable=no-member
@@ -232,11 +232,11 @@ def setup_shm_db(self):
         This method will copy the SQLite input DB into memory.
         """
 
-        self.main_logger.info("Copy into a SHM db: %s",
+        self.main_logger.debug("Copy into a SHM db: %s",
                               self.json_conf["pick"]["run_options"]["shm"])
         if self.json_conf["pick"]["run_options"]["shm"] is True:
             self.json_conf["pick"]["run_options"]["shm_shared"] = False
-            self.main_logger.info("Copying the DB into memory")
+            self.main_logger.debug("Copying the DB into memory")
             assert self.json_conf["db_settings"]["dbtype"] == "sqlite"
             self.json_conf["pick"]["run_options"]["preload"] = False
             if self.json_conf["pick"]["run_options"]["shm_db"] is not None:
@@ -253,7 +253,7 @@ def setup_shm_db(self):
                 self.json_conf["pick"]["run_options"]["shm_db"] = temp
             if self.json_conf["pick"]["run_options"]["shm"]:
                 if not os.path.exists(self.json_conf["pick"]["run_options"]["shm_db"]):
-                    self.main_logger.info("Copying {0} into {1}".format(
+                    self.main_logger.debug("Copying {0} into {1}".format(
                         self.json_conf["db_settings"]["db"],
                         self.json_conf["pick"]["run_options"]["shm_db"]))
                     try:
@@ -265,7 +265,7 @@ def setup_shm_db(self):
                             Back to using the DB on disk.""")
                         self.json_conf["pick"]["run_options"]["shm"] = False
                 else:
-                    self.main_logger.info("%s exists already. Doing nothing.",
+                    self.main_logger.debug("%s exists already. Doing nothing.",
                                           self.json_conf["pick"]["run_options"]["shm_db"])
             self.main_logger.info("DB copied into memory")
 
@@ -548,7 +548,7 @@ def __preload_blast(self, engine, queries):
                 hsps[hsp.query_id] = collections.defaultdict(list)
             hsps[hsp.query_id][hsp.target_id].append(hsp)
 
-        self.main_logger.info("{0} HSPs prepared".format(len(hsps)))
+        self.main_logger.debug("{0} HSPs prepared".format(len(hsps)))
 
         targets = dict((x.target_id, x) for x in engine.execute("select * from target"))
 
@@ -598,7 +598,7 @@ def __preload_blast(self, engine, queries):
 
         del hsps
         assert len(hits_dict) <= len(queries)
-        self.main_logger.info("%d BLAST hits loaded for %d queries",
+        self.main_logger.debug("%d BLAST hits loaded for %d queries",
                               hit_counter,
                               len(hits_dict))
         self.main_logger.debug("%s",
@@ -634,7 +634,7 @@ def preload(self):
 
         # data_dict["junctions"] = self.manager.dict(data_dict["junctions"], lock=False)
 
-        self.main_logger.info("%d junctions loaded",
+        self.main_logger.debug("%d junctions loaded",
                               len(data_dict["junctions"]))
         self.main_logger.debug("Example junctions:\n{0}".format(
             "\n".join(str(junc) for junc in list(
@@ -658,7 +658,7 @@ def preload(self):
 
         # data_dict['orf'] = self.manager.dict(orfs, lock=False)
 
-        self.main_logger.info("%d ORFs loaded",
+        self.main_logger.debug("%d ORFs loaded",
                               len(data_dict["orfs"]))
         self.main_logger.debug(",".join(
             list(data_dict["orfs"].keys())[:10]
@@ -800,7 +800,7 @@ def __submit_multi_threading(self, data_dict):
         """
 
         intron_range = self.json_conf["pick"]["run_options"]["intron_range"]
-        self.logger.info("Intron range: %s", intron_range)
+        self.logger.debug("Intron range: %s", intron_range)
 
         current_locus = None
         current_transcript = None
@@ -826,7 +826,7 @@ def __submit_multi_threading(self, data_dict):
         # tempdir = os.path.join(self.json_conf["pick"]["files"]["output_dir"], "mikado_pick_tmp")
         # os.makedirs(tempdir, exist_ok=True)
 
-        self.logger.info("Creating the worker processes")
+        self.logger.debug("Creating the worker processes")
         conn, cursor = self._create_temporary_store(tempdir)
         working_processes = [LociProcesser(self.json_conf,
                                            data_dict,
@@ -838,7 +838,7 @@ def __submit_multi_threading(self, data_dict):
                              for _ in range(1, self.procs+1)]
         # Start all processes
         [_.start() for _ in working_processes]
-        self.logger.info("Started all %d workers", self.procs)
+        self.logger.debug("Started all %d workers", self.procs)
         # No sense in keeping this data available on the main thread now
         del data_dict
 
@@ -927,7 +927,7 @@ def __submit_multi_threading(self, data_dict):
                           current_locus.id, counter,
                           ", ".join(list(current_locus.transcripts.keys())))
         locus_queue.put(("EXIT", ))
-        self.logger.info("Joining children processes")
+        self.logger.debug("Joining children processes")
         [_.join() for _ in working_processes]
         conn.close()
         self.logger.info("Joined children processes; starting to merge partial files")
@@ -988,7 +988,7 @@ def __submit_single_threaded(self, data_dict):
         logger.debug("Begun single-threaded run")
 
         intron_range = self.json_conf["pick"]["run_options"]["intron_range"]
-        logger.info("Intron range: %s", intron_range)
+        logger.debug("Intron range: %s", intron_range)
 
         handles = self.__get_output_files()
 
@@ -1170,7 +1170,7 @@ def __call__(self):
         # Clean up the DB copied to SHM
         if (self.json_conf["pick"]["run_options"]["shm"] is True and
                 self.json_conf["pick"]["run_options"]["shm_shared"] is False):
-            self.main_logger.info("Removing shared memory DB %s",
+            self.main_logger.debug("Removing shared memory DB %s",
                                   self.json_conf["pick"]["run_options"]["shm_db"])
             os.remove(self.json_conf["pick"]["run_options"]["shm_db"])
 

diff --git a/Mikado/preparation/annotation_parser.py b/Mikado/preparation/annotation_parser.py
@@ -248,7 +248,7 @@ def load_into_storage(shelf_name, exon_lines, min_length, logger, strip_cds=True
 
         # Discard transcript under a certain size
         if tlength < min_length:
-            logger.debug("Discarding %s because its size (%d) is under the minimum of %d",
+            logger.info("Discarding %s because its size (%d) is under the minimum of %d",
                          tid, tlength, min_length)
             continue
 

diff --git a/Mikado/preparation/prepare.py b/Mikado/preparation/prepare.py
@@ -121,8 +121,12 @@ def store_transcripts(shelf_stacks, logger, keep_redundant=False):
                                 logger.debug("The following transcripts are redundant: %s",
                                              ",".join([_[0] for _ in cds_list]))
                             to_keep = random.choice(cds_list)
-                            logger.debug("Keeping only %s out of the list",
-                                         to_keep)
+                            for tid in cds_list:
+                                if tid != to_keep:
+                                    logger.info("Discarding %s as redundant", tid[0])
+                                else:
+                                    logger.info("Keeping %s amongst redundant transcripts", tid[0])
+
                             tids.append(to_keep)
                     else:
                         tids.extend(tid_list)