From 1f5dd51dd745e48940c3df0790602f41158a3d50 Mon Sep 17 00:00:00 2001 From: gbouras13 Date: Wed, 10 Jan 2024 14:28:44 +1030 Subject: [PATCH] fix #299 extra parameter options --- bin/input_commands.py | 12 ++++++++++++ bin/pharokka.py | 4 ++-- bin/processes.py | 15 ++++++++++----- tests/test_overall.py | 13 +++++++++++-- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/bin/input_commands.py b/bin/input_commands.py index cd7cbe0..1250c3b 100644 --- a/bin/input_commands.py +++ b/bin/input_commands.py @@ -149,6 +149,18 @@ def get_input(): help="Skips running mash to find the closest match for each contig in INPHARED.", action="store_true", ) + parser.add_argument( + "--minced_args", + help="extra commands to pass to MINced (please omit the leading hyphen for the first argument). You will need to use quotation marks e.g. --minced_args \"minNR 2 -minRL 21\"", + default="", + type=str + ) + parser.add_argument( + "--mash_distance", + help="mash distance for the search against INPHARED. Defaults to 0.2.", + default=0.2, + type=float + ) parser.add_argument( "-V", "--version", diff --git a/bin/pharokka.py b/bin/pharokka.py index bce6a6c..935b21c 100755 --- a/bin/pharokka.py +++ b/bin/pharokka.py @@ -321,7 +321,7 @@ def main(): logger.info("Starting tRNA-scanSE.") run_trna_scan(input_fasta, args.threads, out_dir, logdir) # run minced and aragorn - run_minced(input_fasta, out_dir, prefix, logdir) + run_minced(input_fasta, out_dir, prefix, args.minced_args, logdir) run_aragorn(input_fasta, out_dir, prefix, logdir) # running mmseqs2 on the 3 databases @@ -460,7 +460,7 @@ def main(): logger.info("Finding the closest match for each contig in INPHARED using mash.") # in process.py run_mash_sketch(input_fasta, out_dir, logdir) - run_mash_dist(out_dir, db_dir, logdir) + run_mash_dist(out_dir, db_dir, args.mash_distance, logdir) # part of the class pharok.inphared_top_hits() else: diff --git a/bin/processes.py b/bin/processes.py index 045104a..96322f5 100644 --- a/bin/processes.py +++ b/bin/processes.py @@ -776,12 +776,13 @@ def convert_gff_to_gbk(filepath_in, input_dir, out_dir, prefix, prot_seq_df): SeqIO.write(record, gbk_handler, "genbank") -def run_minced(filepath_in, out_dir, prefix, logdir): +def run_minced(filepath_in, out_dir, prefix, minced_args, logdir): """ Runs MinCED :param filepath_in: input fasta file :param out_dir: output directory :param logger: logger + :param minced_args: str with extra arguments to pass to MINced :params prefix: prefix :return: """ @@ -791,11 +792,14 @@ def run_minced(filepath_in, out_dir, prefix, logdir): output_spacers = os.path.join(out_dir, prefix + "_minced_spacers.txt") output_gff = os.path.join(out_dir, prefix + "_minced.gff") + if minced_args != "": + minced_args = f"-{minced_args}" + minced_fast = ExternalTool( tool="minced", input=f"", - output=f"{output_spacers} {output_gff}", - params=f" {filepath_in}", # need strange order for minced params go first + output=f" {output_spacers} {output_gff}", + params=f" {minced_args} {filepath_in}", # need strange order for minced params go first logdir=logdir, outfile="", ) @@ -910,11 +914,12 @@ def run_mash_sketch(filepath_in, out_dir, logdir): logger.error("Error with mash sketch\n") -def run_mash_dist(out_dir, db_dir, logdir): +def run_mash_dist(out_dir, db_dir, mash_distance, logdir): """ Runs mash :param filepath_in: input filepath :param out_dir: output directory + :param mash_distance: mash distance - float :param logger logger :return: """ @@ -927,7 +932,7 @@ def run_mash_dist(out_dir, db_dir, logdir): tool="mash", input="", output="", - params=f" dist {mash_sketch} {phrog_sketch} -d 0.2 -i ", + params=f" dist {mash_sketch} {phrog_sketch} -d {mash_distance} -i ", logdir=logdir, outfile=mash_tsv, ) diff --git a/tests/test_overall.py b/tests/test_overall.py index 360eb25..3203f4f 100755 --- a/tests/test_overall.py +++ b/tests/test_overall.py @@ -85,6 +85,11 @@ def test_overall(tmp_dir): cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f" exec_command(cmd) +def test_overall_mash_distance(tmp_dir): + """test pharokka overall with stricter mash distance""" + input_fasta: Path = f"{standard_data}/SAOMS1.fasta" + cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f --mash_distance 0.05" + exec_command(cmd) def test_overall_crispr(tmp_dir): """test pharokka overall crispr""" @@ -92,6 +97,12 @@ def test_overall_crispr(tmp_dir): cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f" exec_command(cmd) +def test_overall_crispr_minced_args(tmp_dir): + """test pharokka crispr with minced args""" + input_fasta: Path = f"{CRISPR_data}/Biggiephage_A_fullcontig_CasĪ¦1.fasta" + cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -g prodigal --minced_args \"minNR 2 -minRL 21\" " + exec_command(cmd) + def test_overall_vfdb(tmp_dir): """test pharokka overall on a phage with vfdb hits. Also include --skip_extra_annotations""" @@ -137,14 +148,12 @@ def test_meta_prodigal_gv(tmp_dir): cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -m -g prodigal-gv" exec_command(cmd) - def test_meta_dnaapler_all_bug(tmp_dir): """test pharokka meta dnaapler bug and split""" input_fasta: Path = f"{meta_data}/combined_meta.fasta" cmd = f"pharokka.py -i {input_fasta} -d {database_dir} -o {tmp_dir} -t {threads} -f -m -s --dnaapler --meta_hmm" exec_command(cmd) - def test_overall_locus(tmp_dir): """test pharokka overall locus tag prefix""" input_fasta: Path = f"{standard_data}/SAOMS1.fasta"