From 6175eb2f6b8f777918651756c28433886ccd76e5 Mon Sep 17 00:00:00 2001
From: Luca Venturini <Luca.Venturini@tgac.ac.uk>
Date: Fri, 12 Oct 2018 14:46:42 +0100
Subject: [PATCH] Modified error messages for Mikado prepare, serialise and
 pick. Now the default level "INFO" should give appropriate results. Also
 correcting the codon table issue for #34

---
 .../configuration_blueprint.json              |  7 +--
 Mikado/loci/locus.py                          | 10 ++++
 Mikado/loci/superlocus.py                     |  3 ++
 Mikado/picking/picker.py                      | 30 ++++++------
 Mikado/preparation/annotation_parser.py       |  2 +-
 Mikado/preparation/prepare.py                 |  8 +++-
 .../blast_serializer/xml_serialiser.py        | 46 +++++++++----------
 Mikado/serializers/orf.py                     | 12 ++---
 Mikado/subprograms/serialise.py               | 16 +++++--
 .../transcript_methods/splitting.py           |  1 +
 10 files changed, 81 insertions(+), 54 deletions(-)

diff --git a/Mikado/configuration/configuration_blueprint.json b/Mikado/configuration/configuration_blueprint.json
index a92896aaf..99ae75e6e 100644
--- a/Mikado/configuration/configuration_blueprint.json
+++ b/Mikado/configuration/configuration_blueprint.json
@@ -31,7 +31,7 @@
         "Settings related to the logs. Keys:",
         "- sql_level: verbosity for SQL calls. Default: WARNING.",
         "  In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'",
-        "- log_level: verbosity. Default: WARNING.",
+        "- log_level: verbosity. Default: INFO.",
         "  In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'"
       ],
       "type": "object",
@@ -45,7 +45,7 @@
             "ERROR",
             "CRITICAL"
           ],
-          "default": "WARNING"
+          "default": "INFO"
         },
         "sql_level": {
           "type": "string",
@@ -202,7 +202,8 @@
                   "Scenedesmus obliquus Mitochondrial", "Thraustochytrium Mitochondrial",
                   "Pterobranchia Mitochondrial", "Candidate Division SR1", "Gracilibacteria",
                   "Pachysolen tannophilus Nuclear", "Karyorelict Nuclear", "Condylostoma Nuclear",
-                  "Mesodinium Nuclear", "Peritrich Nuclear", "Blastocrithidia Nuclear"]
+                  "Mesodinium Nuclear", "Peritrich Nuclear", "Blastocrithidia Nuclear"],
+          "default": 0
             }
       }
     },
diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
index 906ed0802..f850198ca 100644
--- a/Mikado/loci/locus.py
+++ b/Mikado/loci/locus.py
@@ -938,5 +938,15 @@ def expand_transcript(transcript, new_start, new_end, fai, logger):
         logger.info("Padding %s would lead to an in-frame stop codon. Aborting.",
                     transcript.id)
         return backup
+    else:
+        message = "{transcript.id} has now start {transcript.start}, end {transcript.end}"
+        if ((backup.combined_cds_end != transcript.combined_cds_end) or
+                  (backup.combined_cds_start != transcript.combined_cds_start)):
+            transcript.attributes["cds_padded"] = True
+            message += "; CDS moved to {transcript.combined_cds_start}, end {transcript.combined_cds_end}"
+        else:
+            transcript.attributes["cds_padded"] = False
+            message += "."
+        logger.info(message.format(**locals()))
 
     return transcript
diff --git a/Mikado/loci/superlocus.py b/Mikado/loci/superlocus.py
index dd996ee10..6db8fe271 100644
--- a/Mikado/loci/superlocus.py
+++ b/Mikado/loci/superlocus.py
@@ -444,6 +444,9 @@ def load_transcript_data(self, tid, data_dict):
                 if len(new_tr) > 1:
                     to_add.update(new_tr)
                     to_remove = True
+                    self.logger.info("%s has been split into %d different transcripts.",
+                                     tid, len(new_tr))
+
         del data_dict
         return to_remove, to_add
         # @profile
diff --git a/Mikado/picking/picker.py b/Mikado/picking/picker.py
index ab9d50b86..67b597698 100644
--- a/Mikado/picking/picker.py
+++ b/Mikado/picking/picker.py
@@ -96,7 +96,7 @@ def __init__(self, json_conf, commandline=""):
                                          force=True)
 
         # self.setup_logger()
-        self.logger.info("Multiprocessing method: %s",
+        self.logger.debug("Multiprocessing method: %s",
                          self.json_conf["multiprocessing_method"])
 
         # pylint: enable=no-member
@@ -232,11 +232,11 @@ def setup_shm_db(self):
         This method will copy the SQLite input DB into memory.
         """
 
-        self.main_logger.info("Copy into a SHM db: %s",
+        self.main_logger.debug("Copy into a SHM db: %s",
                               self.json_conf["pick"]["run_options"]["shm"])
         if self.json_conf["pick"]["run_options"]["shm"] is True:
             self.json_conf["pick"]["run_options"]["shm_shared"] = False
-            self.main_logger.info("Copying the DB into memory")
+            self.main_logger.debug("Copying the DB into memory")
             assert self.json_conf["db_settings"]["dbtype"] == "sqlite"
             self.json_conf["pick"]["run_options"]["preload"] = False
             if self.json_conf["pick"]["run_options"]["shm_db"] is not None:
@@ -253,7 +253,7 @@ def setup_shm_db(self):
                 self.json_conf["pick"]["run_options"]["shm_db"] = temp
             if self.json_conf["pick"]["run_options"]["shm"]:
                 if not os.path.exists(self.json_conf["pick"]["run_options"]["shm_db"]):
-                    self.main_logger.info("Copying {0} into {1}".format(
+                    self.main_logger.debug("Copying {0} into {1}".format(
                         self.json_conf["db_settings"]["db"],
                         self.json_conf["pick"]["run_options"]["shm_db"]))
                     try:
@@ -265,7 +265,7 @@ def setup_shm_db(self):
                             Back to using the DB on disk.""")
                         self.json_conf["pick"]["run_options"]["shm"] = False
                 else:
-                    self.main_logger.info("%s exists already. Doing nothing.",
+                    self.main_logger.debug("%s exists already. Doing nothing.",
                                           self.json_conf["pick"]["run_options"]["shm_db"])
             self.main_logger.info("DB copied into memory")
 
@@ -548,7 +548,7 @@ def __preload_blast(self, engine, queries):
                 hsps[hsp.query_id] = collections.defaultdict(list)
             hsps[hsp.query_id][hsp.target_id].append(hsp)
 
-        self.main_logger.info("{0} HSPs prepared".format(len(hsps)))
+        self.main_logger.debug("{0} HSPs prepared".format(len(hsps)))
 
         targets = dict((x.target_id, x) for x in engine.execute("select * from target"))
 
@@ -598,7 +598,7 @@ def __preload_blast(self, engine, queries):
 
         del hsps
         assert len(hits_dict) <= len(queries)
-        self.main_logger.info("%d BLAST hits loaded for %d queries",
+        self.main_logger.debug("%d BLAST hits loaded for %d queries",
                               hit_counter,
                               len(hits_dict))
         self.main_logger.debug("%s",
@@ -634,7 +634,7 @@ def preload(self):
 
         # data_dict["junctions"] = self.manager.dict(data_dict["junctions"], lock=False)
 
-        self.main_logger.info("%d junctions loaded",
+        self.main_logger.debug("%d junctions loaded",
                               len(data_dict["junctions"]))
         self.main_logger.debug("Example junctions:\n{0}".format(
             "\n".join(str(junc) for junc in list(
@@ -658,7 +658,7 @@ def preload(self):
 
         # data_dict['orf'] = self.manager.dict(orfs, lock=False)
 
-        self.main_logger.info("%d ORFs loaded",
+        self.main_logger.debug("%d ORFs loaded",
                               len(data_dict["orfs"]))
         self.main_logger.debug(",".join(
             list(data_dict["orfs"].keys())[:10]
@@ -800,7 +800,7 @@ def __submit_multi_threading(self, data_dict):
         """
 
         intron_range = self.json_conf["pick"]["run_options"]["intron_range"]
-        self.logger.info("Intron range: %s", intron_range)
+        self.logger.debug("Intron range: %s", intron_range)
 
         current_locus = None
         current_transcript = None
@@ -826,7 +826,7 @@ def __submit_multi_threading(self, data_dict):
         # tempdir = os.path.join(self.json_conf["pick"]["files"]["output_dir"], "mikado_pick_tmp")
         # os.makedirs(tempdir, exist_ok=True)
 
-        self.logger.info("Creating the worker processes")
+        self.logger.debug("Creating the worker processes")
         conn, cursor = self._create_temporary_store(tempdir)
         working_processes = [LociProcesser(self.json_conf,
                                            data_dict,
@@ -838,7 +838,7 @@ def __submit_multi_threading(self, data_dict):
                              for _ in range(1, self.procs+1)]
         # Start all processes
         [_.start() for _ in working_processes]
-        self.logger.info("Started all %d workers", self.procs)
+        self.logger.debug("Started all %d workers", self.procs)
         # No sense in keeping this data available on the main thread now
         del data_dict
 
@@ -927,7 +927,7 @@ def __submit_multi_threading(self, data_dict):
                           current_locus.id, counter,
                           ", ".join(list(current_locus.transcripts.keys())))
         locus_queue.put(("EXIT", ))
-        self.logger.info("Joining children processes")
+        self.logger.debug("Joining children processes")
         [_.join() for _ in working_processes]
         conn.close()
         self.logger.info("Joined children processes; starting to merge partial files")
@@ -988,7 +988,7 @@ def __submit_single_threaded(self, data_dict):
         logger.debug("Begun single-threaded run")
 
         intron_range = self.json_conf["pick"]["run_options"]["intron_range"]
-        logger.info("Intron range: %s", intron_range)
+        logger.debug("Intron range: %s", intron_range)
 
         handles = self.__get_output_files()
 
@@ -1170,7 +1170,7 @@ def __call__(self):
         # Clean up the DB copied to SHM
         if (self.json_conf["pick"]["run_options"]["shm"] is True and
                 self.json_conf["pick"]["run_options"]["shm_shared"] is False):
-            self.main_logger.info("Removing shared memory DB %s",
+            self.main_logger.debug("Removing shared memory DB %s",
                                   self.json_conf["pick"]["run_options"]["shm_db"])
             os.remove(self.json_conf["pick"]["run_options"]["shm_db"])
 
diff --git a/Mikado/preparation/annotation_parser.py b/Mikado/preparation/annotation_parser.py
index 718c4e3eb..b38480b9d 100644
--- a/Mikado/preparation/annotation_parser.py
+++ b/Mikado/preparation/annotation_parser.py
@@ -248,7 +248,7 @@ def load_into_storage(shelf_name, exon_lines, min_length, logger, strip_cds=True
 
         # Discard transcript under a certain size
         if tlength < min_length:
-            logger.debug("Discarding %s because its size (%d) is under the minimum of %d",
+            logger.info("Discarding %s because its size (%d) is under the minimum of %d",
                          tid, tlength, min_length)
             continue
 
diff --git a/Mikado/preparation/prepare.py b/Mikado/preparation/prepare.py
index 0d59bfbb4..1beba5203 100644
--- a/Mikado/preparation/prepare.py
+++ b/Mikado/preparation/prepare.py
@@ -121,8 +121,12 @@ def store_transcripts(shelf_stacks, logger, keep_redundant=False):
                                 logger.debug("The following transcripts are redundant: %s",
                                              ",".join([_[0] for _ in cds_list]))
                             to_keep = random.choice(cds_list)
-                            logger.debug("Keeping only %s out of the list",
-                                         to_keep)
+                            for tid in cds_list:
+                                if tid != to_keep:
+                                    logger.info("Discarding %s as redundant", tid[0])
+                                else:
+                                    logger.info("Keeping %s amongst redundant transcripts", tid[0])
+
                             tids.append(to_keep)
                     else:
                         tids.extend(tid_list)
diff --git a/Mikado/serializers/blast_serializer/xml_serialiser.py b/Mikado/serializers/blast_serializer/xml_serialiser.py
index 8817488c0..3ce4e79bb 100644
--- a/Mikado/serializers/blast_serializer/xml_serialiser.py
+++ b/Mikado/serializers/blast_serializer/xml_serialiser.py
@@ -91,7 +91,7 @@ def _pickler(self, filename):
             self.logger.warning("Invalid BLAST file: %s", filename)
             return []
 
-        self.logger.info("Starting to pickle %s", filename)
+        self.logger.debug("Starting to pickle %s", filename)
         hits, hsps = [], []
 
         pickle_count = 0
@@ -131,7 +131,7 @@ def _pickler(self, filename):
         yield pickle_temp[1]
         pickle_count += 1
         # pfiles.append()
-        self.logger.info("Finished pickling %s in %s subsection", filename, pickle_count)
+        self.logger.debug("Finished pickling %s in %s subsection", filename, pickle_count)
         # del records
 
     def run(self):
@@ -154,7 +154,7 @@ def run(self):
                 continue
 
             if filename == "EXIT":
-                self.logger.info("Process %s received EXIT signal, terminating",
+                self.logger.debug("Process %s received EXIT signal, terminating",
                                  self._name)
                 self.filequeue.put((number, filename))
                 return 0
@@ -205,7 +205,7 @@ def __init__(self, xml_name,
             self.logger = check_logger(logger)
         else:
             raise ValueError("No logger provided!")
-        self.logger.info("Started to serialise %s, log level: %s",
+        self.logger.debug("Started to serialise %s, log level: %s",
                          xml_name, self.logger.level)
 
         # Runtime arguments
@@ -242,7 +242,7 @@ def __init__(self, xml_name,
         DBBASE.metadata.create_all(self.engine)  # @UndefinedVariable
         session = Session(bind=self.engine, autocommit=True, autoflush=True)
         self.session = session  # session()
-        self.logger.info("Created the session")
+        self.logger.debug("Created the session")
         # Load sequences if necessary
         self.__determine_sequences(query_seqs, target_seqs)
         self.xml = xml_name
@@ -257,7 +257,7 @@ def __init__(self, xml_name,
 
         self.queries, self.targets = dict(), dict()
 
-        self.logger.info("Finished __init__")
+        self.logger.debug("Finished __init__")
 
     def __getstate__(self):
 
@@ -321,7 +321,7 @@ def __serialize_queries(self, queries):
             })
 
             if len(objects) >= self.maxobjects:
-                self.logger.info("Loading %d objects into the \"query\" table (total %d)",
+                self.logger.debug("Loading %d objects into the \"query\" table (total %d)",
                                  self.maxobjects, counter)
 
                 # pylint: disable=no-member
@@ -335,7 +335,7 @@ def __serialize_queries(self, queries):
                 objects = []
 
         if len(objects) > 0:
-            self.logger.info("Loading %d objects into the \"query\" table (total %d)",
+            self.logger.debug("Loading %d objects into the \"query\" table (total %d)",
                              len(objects), counter+len(objects))
             # pylint: disable=no-member
             counter += len(objects)
@@ -345,7 +345,7 @@ def __serialize_queries(self, queries):
             # pylint: enable=no-member
             self.session.commit()
             # pylint: enable=no-member
-            self.logger.info("Loaded %d objects into the \"query\" table", counter)
+            self.logger.debug("Loaded %d objects into the \"query\" table", counter)
         for query in self.session.query(Query):
             queries[query.query_name] = (query.query_id, query.query_length)
         self.logger.info("%d in queries", len(queries))
@@ -381,7 +381,7 @@ def __serialize_targets(self, targets):
                 # objects.append(Target(record, len(self.target_seqs[record])))
                 if len(objects) >= self.maxobjects:
                     # counter += len(objects)
-                    self.logger.info("Loading %d objects into the \"target\" table",
+                    self.logger.debug("Loading %d objects into the \"target\" table",
                                      counter)
                     # self.session.bulk_insert_mappings(Target, objects)
                     self.session.begin(subtransactions=True)
@@ -393,7 +393,7 @@ def __serialize_targets(self, targets):
                     # self.logger.info("Loaded %d objects into the \"target\" table",
                     #                  len(objects))
                     # objects = []
-        self.logger.info("Loading %d objects into the \"target\" table, (total %d)",
+        self.logger.debug("Loading %d objects into the \"target\" table, (total %d)",
                          len(objects), counter)
         # pylint: disable=no-member
         self.session.begin(subtransactions=True)
@@ -404,7 +404,7 @@ def __serialize_targets(self, targets):
         self.logger.info("Loaded %d objects into the \"target\" table", counter)
         for target in self.session.query(Target):
             targets[target.target_name] = (target.target_id, target.target_length is not None)
-        self.logger.info("%d in targets", len(targets))
+        self.logger.debug("%d in targets", len(targets))
         return targets
 
     def __serialise_sequences(self):
@@ -415,15 +415,15 @@ def __serialise_sequences(self):
 
         targets = dict()
         queries = dict()
-        self.logger.info("Loading previous IDs")
+        self.logger.debug("Loading previous IDs")
         for query in self.session.query(Query):
             queries[query.query_name] = (query.query_id, (query.query_length is not None))
         for target in self.session.query(Target):
             targets[target.target_name] = (target.target_id, (target.target_length is not None))
-        self.logger.info("Loaded previous IDs; %d for queries, %d for targets",
+        self.logger.debug("Loaded previous IDs; %d for queries, %d for targets",
                          len(queries), len(targets))
 
-        self.logger.info("Started the sequence serialisation")
+        self.logger.debug("Started the sequence serialisation")
         if self.target_seqs:
             targets = self.__serialize_targets(targets)
             assert len(targets) > 0
@@ -456,7 +456,7 @@ def __load_into_db(self, hits, hsps, force=False):
 
         if tot_objects >= self.maxobjects or force:
             # Bulk load
-            self.logger.info("Loading %d BLAST objects into database", tot_objects)
+            self.logger.debug("Loading %d BLAST objects into database", tot_objects)
 
             try:
                 # pylint: disable=no-member
@@ -469,7 +469,7 @@ def __load_into_db(self, hits, hsps, force=False):
                 self.logger.critical("Failed to serialise BLAST!")
                 self.logger.exception(err)
                 raise err
-            self.logger.info("Loaded %d BLAST objects into database", tot_objects)
+            self.logger.debug("Loaded %d BLAST objects into database", tot_objects)
             hits, hsps = [], []
         return hits, hsps
 
@@ -525,7 +525,7 @@ def serialize(self):
             _, _ = self.__load_into_db(hits, hsps, force=True)
 
         else:
-            self.logger.info("Creating a pool with %d processes",
+            self.logger.debug("Creating a pool with %d processes",
                              min(self.procs, len(self.xml)))
 
             filequeue = multiprocessing.Queue(-1)
@@ -547,12 +547,12 @@ def serialize(self):
                 for _ in range(min([self.procs, len(self.xml)]))
                 ]
 
-            self.logger.info("Starting to pickle and serialise %d files", len(self.xml))
+            self.logger.debug("Starting to pickle and serialise %d files", len(self.xml))
             [_.start() for _ in procs]  # Start processes
             for number, xml_name in enumerate(self.xml):
                 filequeue.put((number, xml_name))
 
-            self.logger.info("Finished sending off the data for serialisation")
+            self.logger.debug("Finished sending off the data for serialisation")
 
             filequeue.put((None, "EXIT"))
             returned = []
@@ -575,7 +575,7 @@ def serialize(self):
                         hsps.extend(__hsps)
                         hits, hsps = load_into_db(self, hits, hsps, force=False)
                         if record_counter > 0 and record_counter % 10000 == 0:
-                            self.logger.info("Parsed %d queries", record_counter)
+                            self.logger.debug("Parsed %d queries", record_counter)
                     os.remove(pickle_file)
             [_.join() for _ in procs]  # Wait for processes to join
             self.logger.info("All %d children finished", len(procs))
@@ -720,7 +720,7 @@ def load_into_db(self, hits, hsps, force=False):
 
     if tot_objects >= self.maxobjects or force:
         # Bulk load
-        self.logger.info("Loading %d BLAST objects into database", tot_objects)
+        self.logger.debug("Loading %d BLAST objects into database", tot_objects)
 
         try:
             # pylint: disable=no-member
@@ -733,7 +733,7 @@ def load_into_db(self, hits, hsps, force=False):
             self.logger.critical("Failed to serialise BLAST!")
             self.logger.exception(err)
             raise err
-        self.logger.info("Loaded %d BLAST objects into database", tot_objects)
+        self.logger.debug("Loaded %d BLAST objects into database", tot_objects)
         hits, hsps = [], []
     return hits, hsps
 
diff --git a/Mikado/serializers/orf.py b/Mikado/serializers/orf.py
index 14c51c9d0..302961084 100644
--- a/Mikado/serializers/orf.py
+++ b/Mikado/serializers/orf.py
@@ -225,7 +225,7 @@ def load_fasta(self, cache):
 
         if self.fasta_index is not None:
             done = 0
-            self.logger.info("%d entries already present in db, %d in the index",
+            self.logger.debug("%d entries already present in db, %d in the index",
                              len([fasta_key for fasta_key in self.fasta_index if
                                   fasta_key not in cache]),
                              len(self.fasta_index.keys()))
@@ -241,7 +241,7 @@ def load_fasta(self, cache):
                     self.session.begin(subtransactions=True)
                     self.session.bulk_save_objects(objects)
                     self.session.commit()
-                    self.logger.info(
+                    self.logger.debug(
                         "Loaded %d transcripts into query table", done)
                     objects = []
 
@@ -249,7 +249,7 @@ def load_fasta(self, cache):
             self.session.begin(subtransactions=True)
             self.session.bulk_save_objects(objects)
             self.session.commit()
-            self.logger.info(
+            self.logger.debug(
                 "Finished loading %d transcripts into query table", done)
         return cache
 
@@ -269,10 +269,10 @@ def serialize(self):
         done = 0
         cache = self.load_fasta(cache)
 
-        self.logger.info("Loading IDs into the cache")
+        self.logger.debug("Loading IDs into the cache")
         for record in self.session.query(Query):
             cache[record.query_name] = record.query_id
-        self.logger.info("Finished loading IDs into the cache")
+        self.logger.debug("Finished loading IDs into the cache")
 
         for row in self.bed12_parser:
             if row.header is True:
@@ -298,7 +298,7 @@ def serialize(self):
                 self.session.begin(subtransactions=True)
                 self.session.bulk_save_objects(objects)
                 self.session.commit()
-                self.logger.info("Loaded %d ORFs into the database", done)
+                self.logger.debug("Loaded %d ORFs into the database", done)
                 objects = []
 
         done += len(objects)
diff --git a/Mikado/subprograms/serialise.py b/Mikado/subprograms/serialise.py
index 1ca2720f6..83a77f63c 100644
--- a/Mikado/subprograms/serialise.py
+++ b/Mikado/subprograms/serialise.py
@@ -80,7 +80,7 @@ def load_junctions(args, logger):
     for junction_file in iter(
             j_file for j_file in args.json_conf["serialise"]["files"]["junctions"]
             if j_file != ''):
-        logger.info("Loading junctions: %s", junction_file)
+        logger.debug("Loading junctions: %s", junction_file)
         serializer = junction.JunctionSerializer(
             junction_file,
             json_conf=args.json_conf,
@@ -137,7 +137,7 @@ def load_orfs(args, logger):
     if len(args.json_conf["serialise"]["files"]["orfs"]) > 0:
         logger.info("Starting to load ORF data")
         for orf_file in args.json_conf["serialise"]["files"]["orfs"]:
-            logger.info("Starting to load ORFs from %s", orf_file)
+            logger.debug("Starting to load ORFs from %s", orf_file)
             serializer = orf.OrfSerializer(orf_file,
                                            json_conf=args.json_conf,
                                            logger=logger)
@@ -263,6 +263,11 @@ def setup(args):
     if args.max_regression is not None:
         args.json_conf["serialise"]["max_regression"] = args.max_regression
 
+    if args.codon_table is not None:
+        args.json_conf["serialise"]["codon_table"] = args.codon_table
+    else:
+        assert "codon_table" in args.json_conf["serialise"]
+
     logger.setLevel("INFO")
     logger.info("Command line: %s",
                 " ".join(sys.argv))
@@ -300,15 +305,15 @@ def serialise(args):
     logger.info("Command line: %s",  " ".join(sys.argv))
 
     if args.json_conf["serialise"]["force"] is True:
-        logger.warn("Removing old data because force option in place")
         if args.json_conf["db_settings"]["dbtype"] == "sqlite" and os.path.exists(args.json_conf["db_settings"]["db"]):
+            logger.warn("Removing old data because force option in place")
             os.remove(args.json_conf["db_settings"]["db"])
 
         engine = dbutils.connect(args.json_conf)
         meta = sqlalchemy.MetaData(bind=engine)
         meta.reflect(engine)
         for tab in reversed(meta.sorted_tables):
-            logger.warn("Dropping %s", tab)
+            logger.debug("Dropping %s", tab)
             tab.drop()
             if args.json_conf["db_settings"]["dbtype"] == "mysql":
                 engine.execute("OPTIMIZE TABLE {}".format(tab.name))
@@ -369,6 +374,9 @@ def serialise_parser():
                       type=float, default=None,
                       help=""""Amount of sequence in the ORF (in %%) to backtrack
                       in order to find a valid START codon, if one is absent. Default: %(default)s""")
+    orfs.add_argument("--codon-table", dest="codon_table", default=None,
+                      help="""Codon table to use. Default: 0 (ie Standard, NCBI #1, but only ATG is considered
+                      a valid stop codon.""")
 
     blast = parser.add_argument_group()
     blast.add_argument("--max_target_seqs", type=int, default=None,
diff --git a/Mikado/transcripts/transcript_methods/splitting.py b/Mikado/transcripts/transcript_methods/splitting.py
index 5b57d625f..8cbc36242 100644
--- a/Mikado/transcripts/transcript_methods/splitting.py
+++ b/Mikado/transcripts/transcript_methods/splitting.py
@@ -825,6 +825,7 @@ def split_by_cds(transcript):
 
         new_transc.verified_introns = set.intersection(set(new_transc.introns),
                                                        transcript.verified_introns)
+        new_transc.attributes["split"] = True
         yield new_transc
 
     return