Skip to content

Commit

Permalink
Modified error messages for Mikado prepare, serialise and pick. Now t…
Browse files Browse the repository at this point in the history
…he default level "INFO" should give appropriate results. Also correcting the codon table issue for EI-CoreBioinformatics#34
  • Loading branch information
Luca Venturini authored and Luca Venturini committed Oct 12, 2018
1 parent a5aeb31 commit 6175eb2
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 54 deletions.
7 changes: 4 additions & 3 deletions Mikado/configuration/configuration_blueprint.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"Settings related to the logs. Keys:",
"- sql_level: verbosity for SQL calls. Default: WARNING.",
" In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'",
"- log_level: verbosity. Default: WARNING.",
"- log_level: verbosity. Default: INFO.",
" In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'"
],
"type": "object",
Expand All @@ -45,7 +45,7 @@
"ERROR",
"CRITICAL"
],
"default": "WARNING"
"default": "INFO"
},
"sql_level": {
"type": "string",
Expand Down Expand Up @@ -202,7 +202,8 @@
"Scenedesmus obliquus Mitochondrial", "Thraustochytrium Mitochondrial",
"Pterobranchia Mitochondrial", "Candidate Division SR1", "Gracilibacteria",
"Pachysolen tannophilus Nuclear", "Karyorelict Nuclear", "Condylostoma Nuclear",
"Mesodinium Nuclear", "Peritrich Nuclear", "Blastocrithidia Nuclear"]
"Mesodinium Nuclear", "Peritrich Nuclear", "Blastocrithidia Nuclear"],
"default": 0
}
}
},
Expand Down
10 changes: 10 additions & 0 deletions Mikado/loci/locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,5 +938,15 @@ def expand_transcript(transcript, new_start, new_end, fai, logger):
logger.info("Padding %s would lead to an in-frame stop codon. Aborting.",
transcript.id)
return backup
else:
message = "{transcript.id} has now start {transcript.start}, end {transcript.end}"
if ((backup.combined_cds_end != transcript.combined_cds_end) or
(backup.combined_cds_start != transcript.combined_cds_start)):
transcript.attributes["cds_padded"] = True
message += "; CDS moved to {transcript.combined_cds_start}, end {transcript.combined_cds_end}"
else:
transcript.attributes["cds_padded"] = False
message += "."
logger.info(message.format(**locals()))

return transcript
3 changes: 3 additions & 0 deletions Mikado/loci/superlocus.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,9 @@ def load_transcript_data(self, tid, data_dict):
if len(new_tr) > 1:
to_add.update(new_tr)
to_remove = True
self.logger.info("%s has been split into %d different transcripts.",
tid, len(new_tr))

del data_dict
return to_remove, to_add
# @profile
Expand Down
30 changes: 15 additions & 15 deletions Mikado/picking/picker.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __init__(self, json_conf, commandline=""):
force=True)

# self.setup_logger()
self.logger.info("Multiprocessing method: %s",
self.logger.debug("Multiprocessing method: %s",
self.json_conf["multiprocessing_method"])

# pylint: enable=no-member
Expand Down Expand Up @@ -232,11 +232,11 @@ def setup_shm_db(self):
This method will copy the SQLite input DB into memory.
"""

self.main_logger.info("Copy into a SHM db: %s",
self.main_logger.debug("Copy into a SHM db: %s",
self.json_conf["pick"]["run_options"]["shm"])
if self.json_conf["pick"]["run_options"]["shm"] is True:
self.json_conf["pick"]["run_options"]["shm_shared"] = False
self.main_logger.info("Copying the DB into memory")
self.main_logger.debug("Copying the DB into memory")
assert self.json_conf["db_settings"]["dbtype"] == "sqlite"
self.json_conf["pick"]["run_options"]["preload"] = False
if self.json_conf["pick"]["run_options"]["shm_db"] is not None:
Expand All @@ -253,7 +253,7 @@ def setup_shm_db(self):
self.json_conf["pick"]["run_options"]["shm_db"] = temp
if self.json_conf["pick"]["run_options"]["shm"]:
if not os.path.exists(self.json_conf["pick"]["run_options"]["shm_db"]):
self.main_logger.info("Copying {0} into {1}".format(
self.main_logger.debug("Copying {0} into {1}".format(
self.json_conf["db_settings"]["db"],
self.json_conf["pick"]["run_options"]["shm_db"]))
try:
Expand All @@ -265,7 +265,7 @@ def setup_shm_db(self):
Back to using the DB on disk.""")
self.json_conf["pick"]["run_options"]["shm"] = False
else:
self.main_logger.info("%s exists already. Doing nothing.",
self.main_logger.debug("%s exists already. Doing nothing.",
self.json_conf["pick"]["run_options"]["shm_db"])
self.main_logger.info("DB copied into memory")

Expand Down Expand Up @@ -548,7 +548,7 @@ def __preload_blast(self, engine, queries):
hsps[hsp.query_id] = collections.defaultdict(list)
hsps[hsp.query_id][hsp.target_id].append(hsp)

self.main_logger.info("{0} HSPs prepared".format(len(hsps)))
self.main_logger.debug("{0} HSPs prepared".format(len(hsps)))

targets = dict((x.target_id, x) for x in engine.execute("select * from target"))

Expand Down Expand Up @@ -598,7 +598,7 @@ def __preload_blast(self, engine, queries):

del hsps
assert len(hits_dict) <= len(queries)
self.main_logger.info("%d BLAST hits loaded for %d queries",
self.main_logger.debug("%d BLAST hits loaded for %d queries",
hit_counter,
len(hits_dict))
self.main_logger.debug("%s",
Expand Down Expand Up @@ -634,7 +634,7 @@ def preload(self):

# data_dict["junctions"] = self.manager.dict(data_dict["junctions"], lock=False)

self.main_logger.info("%d junctions loaded",
self.main_logger.debug("%d junctions loaded",
len(data_dict["junctions"]))
self.main_logger.debug("Example junctions:\n{0}".format(
"\n".join(str(junc) for junc in list(
Expand All @@ -658,7 +658,7 @@ def preload(self):

# data_dict['orf'] = self.manager.dict(orfs, lock=False)

self.main_logger.info("%d ORFs loaded",
self.main_logger.debug("%d ORFs loaded",
len(data_dict["orfs"]))
self.main_logger.debug(",".join(
list(data_dict["orfs"].keys())[:10]
Expand Down Expand Up @@ -800,7 +800,7 @@ def __submit_multi_threading(self, data_dict):
"""

intron_range = self.json_conf["pick"]["run_options"]["intron_range"]
self.logger.info("Intron range: %s", intron_range)
self.logger.debug("Intron range: %s", intron_range)

current_locus = None
current_transcript = None
Expand All @@ -826,7 +826,7 @@ def __submit_multi_threading(self, data_dict):
# tempdir = os.path.join(self.json_conf["pick"]["files"]["output_dir"], "mikado_pick_tmp")
# os.makedirs(tempdir, exist_ok=True)

self.logger.info("Creating the worker processes")
self.logger.debug("Creating the worker processes")
conn, cursor = self._create_temporary_store(tempdir)
working_processes = [LociProcesser(self.json_conf,
data_dict,
Expand All @@ -838,7 +838,7 @@ def __submit_multi_threading(self, data_dict):
for _ in range(1, self.procs+1)]
# Start all processes
[_.start() for _ in working_processes]
self.logger.info("Started all %d workers", self.procs)
self.logger.debug("Started all %d workers", self.procs)
# No sense in keeping this data available on the main thread now
del data_dict

Expand Down Expand Up @@ -927,7 +927,7 @@ def __submit_multi_threading(self, data_dict):
current_locus.id, counter,
", ".join(list(current_locus.transcripts.keys())))
locus_queue.put(("EXIT", ))
self.logger.info("Joining children processes")
self.logger.debug("Joining children processes")
[_.join() for _ in working_processes]
conn.close()
self.logger.info("Joined children processes; starting to merge partial files")
Expand Down Expand Up @@ -988,7 +988,7 @@ def __submit_single_threaded(self, data_dict):
logger.debug("Begun single-threaded run")

intron_range = self.json_conf["pick"]["run_options"]["intron_range"]
logger.info("Intron range: %s", intron_range)
logger.debug("Intron range: %s", intron_range)

handles = self.__get_output_files()

Expand Down Expand Up @@ -1170,7 +1170,7 @@ def __call__(self):
# Clean up the DB copied to SHM
if (self.json_conf["pick"]["run_options"]["shm"] is True and
self.json_conf["pick"]["run_options"]["shm_shared"] is False):
self.main_logger.info("Removing shared memory DB %s",
self.main_logger.debug("Removing shared memory DB %s",
self.json_conf["pick"]["run_options"]["shm_db"])
os.remove(self.json_conf["pick"]["run_options"]["shm_db"])

Expand Down
2 changes: 1 addition & 1 deletion Mikado/preparation/annotation_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def load_into_storage(shelf_name, exon_lines, min_length, logger, strip_cds=True

# Discard transcript under a certain size
if tlength < min_length:
logger.debug("Discarding %s because its size (%d) is under the minimum of %d",
logger.info("Discarding %s because its size (%d) is under the minimum of %d",
tid, tlength, min_length)
continue

Expand Down
8 changes: 6 additions & 2 deletions Mikado/preparation/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,12 @@ def store_transcripts(shelf_stacks, logger, keep_redundant=False):
logger.debug("The following transcripts are redundant: %s",
",".join([_[0] for _ in cds_list]))
to_keep = random.choice(cds_list)
logger.debug("Keeping only %s out of the list",
to_keep)
for tid in cds_list:
if tid != to_keep:
logger.info("Discarding %s as redundant", tid[0])
else:
logger.info("Keeping %s amongst redundant transcripts", tid[0])

tids.append(to_keep)
else:
tids.extend(tid_list)
Expand Down
Loading

0 comments on commit 6175eb2

Please sign in to comment.