From 8a307cd7b30d1066426f27ec0e5297c29c6dd4fd Mon Sep 17 00:00:00 2001 From: chrishah Date: Tue, 7 Mar 2017 12:15:24 +0100 Subject: [PATCH] more cosmetics --- MITObim.pl | 38 +++++++++++++++++++------------------- README.md | 34 +++++++++++++++++----------------- docker/scripts/MITObim.pl | 38 +++++++++++++++++++------------------- 3 files changed, 55 insertions(+), 55 deletions(-) diff --git a/MITObim.pl b/MITObim.pl index 6146286..3d7bc4e 100755 --- a/MITObim.pl +++ b/MITObim.pl @@ -38,25 +38,25 @@ a baiting and iterative mapping approach. Nucl. Acids Res. 41(13):e129. doi: 10.1093/nar/gkt371\n\n"; my $USAGE = "\nusage: ./MITObim.pl \nparameters: - -start iteration to start with, default=1 - -end iteration to end with, default=1 - -sample sampleID (please don't use '.' in the sampleID). If resuming, the sampleID needs to be identical to that of the previous run. - -ref referenceID as used in initial MIRA assembly - -readpool readpool in fastq format (*.gz is also allowed) - -maf maf file from previous MIRA assembly + -start iteration to start with (default=0, when using '-quick' reference) + -end iteration to end with (default=startiteration, i.e. if not specified otherwise stop after 1 iteration) + -sample sampleID (please don't use '.' in the sampleID). If resuming, the sampleID needs to be identical to that of the previous iteration / MIRA assembly. + -ref referenceID. If resuming, use the same as in previous iteration/initial MIRA assembly. + -readpool readpool in fastq format (*.gz is also allowed). read pairs need to be interleaved for full functionality of the '-pair' option below. + -quick reference sequence to be used as bait in fasta format + -maf extracts reference from maf file created by previous MITObim iteration/MIRA assembly (resume) \noptional: - --quick starts process with initial baiting using provided fasta reference --kbait set kmer for baiting stringency (default: 31) --platform specify sequencing platform (default: 'solexa'; other options: 'iontor', '454', 'pacbio') - --denovo runs MIRA in denovo mode (default: mapping) - --pair finds pairs after baiting (relies on /1 and /2 header convention for read pairs) (default: no) + --denovo runs MIRA in denovo mode + --pair extend readpool to contain full read pairs, even if only one member was baited (relies on /1 and /2 header convention for read pairs) (default: no). --verbose show detailed output of MIRA modules (default: no) --split split reference at positions with more than 5N (default: no) --help shows this helpful information --clean retain only the last 2 iteration directories (default: no) --trimreads trim data (default: no; we recommend to trim beforehand and feed MITObim with pre trimmed data) - --trimoverhang trim overhang up- and downstream of reference (default: no) - --missmatch number of allowed missmatches in mapping - only for illumina data (default: 15% of avg. read length) + --trimoverhang trim overhang up- and downstream of reference, i.e. don't extend the bait, just re-assemble (default: no) + --mismatch number of allowed mismatches in mapping - only for illumina data (default: 15% of avg. read length) --min_cov minimum average coverage of contigs to be retained (default: 0 - off) --min_len minimum length of contig to be retained as backbone (default: 0 - off) --mirapath full path to MIRA binaries (only needed if MIRA is not in PATH) @@ -65,7 +65,7 @@ --version display MITObim version \nexamples: ./MITObim.pl -start 1 -end 5 -sample StrainX -ref reference-mt -readpool illumina_readpool.fastq -maf initial_assembly.maf - ./MITObim.pl -end 10 --quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq\n\n"; + ./MITObim.pl -end 10 -quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq\n\n"; # --proofread applies proofreading (atm only to be used if starting the process from a single short seed reference) # --readlength read length of illumina library, default=150, relevant only for proofreading # --insert insert size of illumina library, default=300, relevant only for proofreading @@ -96,7 +96,7 @@ # "proofreading!" => \$proofreading, "trimreads!" => \$trim, "trimoverhang!" => \$trimoverhang, - "missmatch=i" => \$MM, + "mismatch=i" => \$MM, "platform=s" => \$platform, # "readlength=i" => \$readlength, # "insertsize=i" => \$insertsize, @@ -221,14 +221,14 @@ print "readlength: $readlength\n"; print "insertsize: $insertsize\n"; $MM = 0; - print "number of allowed missmatches in proofreading assembly: $MM\n"; + print "number of allowed mismatches in proofreading assembly: $MM\n"; $shme = "-AL:shme=$MM"; }elsif ((!$proofreading) && (!$mode) && ($platform eq "solexa")){ if ($MM == -1){ - print "number of missmatches in mapping assembly: default (15% of average read length loaded)\n"; + print "number of mismatches in mapping assembly: default (15% of average read length loaded)\n"; $shme = ""; }else { - print "number of missmatches in mapping assembly: $MM\n"; + print "number of mismatches in mapping assembly: $MM\n"; $shme = "-AL:shme=$MM"; } print "proofreading: off\n"; @@ -1018,14 +1018,14 @@ sub finalize_sequence{ } sub create_manifest { - my ($iter, $sampleID, $refID, $mmode, $trim, $platform, $solexa_missmatches, $pair, $overhang, $reads, $ref, $redirect, $NFS_warn); + my ($iter, $sampleID, $refID, $mmode, $trim, $platform, $solexa_mismatches, $pair, $overhang, $reads, $ref, $redirect, $NFS_warn); $iter = $_[0]; $sampleID = $_[1]; $refID = $_[2]; $mmode = $_[3]; $trim = $_[4]; $platform = $_[5]; - $solexa_missmatches = $_[6]; + $solexa_mismatches = $_[6]; $pair = $_[7]; $overhang = $_[8]; $reads = $_[9]; @@ -1040,7 +1040,7 @@ sub create_manifest { open (MANIFEST,">manifest.conf") or die $!; print MANIFEST "#manifest file for iteration $iter created by MITObim\n\nproject = $sampleID-$refID \njob = genome,$mmode,accurate - \nparameters = -NW:mrnl=0:cac=warn$NFS_warn -AS:nop=1 $redirect $overhang $platform $trim -CO:msr=no $solexa_missmatches\n"; + \nparameters = -NW:mrnl=0:cac=warn$NFS_warn -AS:nop=1 $redirect $overhang $platform $trim -CO:msr=no $solexa_mismatches\n"; my @technology = split("_",$platform); #-notraceinfo - if ($mmode eq "mapping"){ diff --git a/README.md b/README.md index e17e003..ee89e1e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ MITObim - mitochondrial baiting and iterative mapping VERSIONS -------- -1.8 (stable - relies on MIRA 4) +1.9 (stable - relies on MIRA 4.0.2) 1.6 (stable - relies on MIRA 3.4.1.1) @@ -40,7 +40,7 @@ PREREQUISITES - GNU utilities - Perl - A running version of MIRA - - MIRA 4 (for the use with MITObim 1.8 (and newer) - download [here](http://sourceforge.net/projects/mira-assembler/files/MIRA/stable/)). + - MIRA 4.0.2 (for the use with MITObim 1.8 (and newer) - download [here](http://sourceforge.net/projects/mira-assembler/files/MIRA/stable/)). - MIRA 3.4.1.1 (for the use with MITObim 1.6 - download [here](http://sourceforge.net/projects/mira-assembler/files/MIRA/Older%20releases/V3.4.0/)). - **Precompiled** binaries for MIRA are available for Linux and OSX. An excellent guide to MIRA is available [here](http://mira-assembler.sourceforge.net/docs/DefinitiveGuideToMIRA.html "The definitive Guide to MIRA"). @@ -95,33 +95,33 @@ which should display the usage (NOTE: From MITObim 1.7 onwards the `-strain` fla ``` MITObim - mitochondrial baiting and iterative mapping -version 1.8 -author: Christoph Hahn, (c) 2012-2016 +version 1.9 usage: ./MITObim.pl parameters: - -start iteration to start with, default=1 - -end iteration to end with, default=1 - -sample sampleID as used in initial MIRA assembly - -ref referenceID as used in initial MIRA assembly - -readpool readpool in fastq format (*.gz is also allowed) - -maf maf file from previous MIRA assembly + -start iteration to start with (default=0, when using '-quick' reference) + -end iteration to end with (default=startiteration, i.e. if not specified otherwise stop after 1 iteration) + -sample sampleID (please don't use '.' in the sampleID). If resuming, the sampleID needs to be identical to that of the previous iteration / MIRA assembly. + -ref referenceID. If resuming, use the same as in previous iteration/initial MIRA assembly. + -readpool readpool in fastq format (*.gz is also allowed). read pairs need to be interleaved for full functionality of the '-pair' option below. + -quick reference sequence to be used as bait in fasta format + -maf extracts reference from maf file created by previous MITObim iteration/MIRA assembly (resume) optional: - --quick starts process with initial baiting using provided fasta reference --kbait set kmer for baiting stringency (default: 31) --platform specify sequencing platform (default: 'solexa'; other options: 'iontor', '454', 'pacbio') - --denovo runs MIRA in denovo mode (default: mapping) - --pair finds pairs after baiting (relies on /1 and /2 header convention for read pairs) (default: no) + --denovo runs MIRA in denovo mode + --pair extend readpool to contain full read pairs, even if only one member was baited (relies on /1 and /2 header convention for read pairs) (default: no). --verbose show detailed output of MIRA modules (default: no) --split split reference at positions with more than 5N (default: no) --help shows this helpful information --clean retain only the last 2 iteration directories (default: no) --trimreads trim data (default: no; we recommend to trim beforehand and feed MITObim with pre trimmed data) - --trimoverhang trim overhang up- and downstream of reference (default: no) - --missmatch number of allowed missmatches in mapping - only for illumina data (default: 15% of avg. read length) - --min_cov minimum average coverage of contigs to be retained (default: off) + --trimoverhang trim overhang up- and downstream of reference, i.e. don't extend the bait, just re-assemble (default: no) + --mismatch number of allowed mismatches in mapping - only for illumina data (default: 15% of avg. read length) + --min_cov minimum average coverage of contigs to be retained (default: 0 - off) + --min_len minimum length of contig to be retained as backbone (default: 0 - off) --mirapath full path to MIRA binaries (only needed if MIRA is not in PATH) --redirect_tmp redirect temporary output to this location (useful in case you are running MITObim on an NFS mount) --NFS_warn_only allow MIRA to run on NFS mount without aborting - warn only (expert option - see MIRA documentation 'check_nfs') @@ -129,7 +129,7 @@ optional: examples: ./MITObim.pl -start 1 -end 5 -sample StrainX -ref reference-mt -readpool illumina_readpool.fastq -maf initial_assembly.maf - ./MITObim.pl -end 10 --quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq + ./MITObim.pl -end 10 -quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq ``` diff --git a/docker/scripts/MITObim.pl b/docker/scripts/MITObim.pl index 6146286..3d7bc4e 100755 --- a/docker/scripts/MITObim.pl +++ b/docker/scripts/MITObim.pl @@ -38,25 +38,25 @@ a baiting and iterative mapping approach. Nucl. Acids Res. 41(13):e129. doi: 10.1093/nar/gkt371\n\n"; my $USAGE = "\nusage: ./MITObim.pl \nparameters: - -start iteration to start with, default=1 - -end iteration to end with, default=1 - -sample sampleID (please don't use '.' in the sampleID). If resuming, the sampleID needs to be identical to that of the previous run. - -ref referenceID as used in initial MIRA assembly - -readpool readpool in fastq format (*.gz is also allowed) - -maf maf file from previous MIRA assembly + -start iteration to start with (default=0, when using '-quick' reference) + -end iteration to end with (default=startiteration, i.e. if not specified otherwise stop after 1 iteration) + -sample sampleID (please don't use '.' in the sampleID). If resuming, the sampleID needs to be identical to that of the previous iteration / MIRA assembly. + -ref referenceID. If resuming, use the same as in previous iteration/initial MIRA assembly. + -readpool readpool in fastq format (*.gz is also allowed). read pairs need to be interleaved for full functionality of the '-pair' option below. + -quick reference sequence to be used as bait in fasta format + -maf extracts reference from maf file created by previous MITObim iteration/MIRA assembly (resume) \noptional: - --quick starts process with initial baiting using provided fasta reference --kbait set kmer for baiting stringency (default: 31) --platform specify sequencing platform (default: 'solexa'; other options: 'iontor', '454', 'pacbio') - --denovo runs MIRA in denovo mode (default: mapping) - --pair finds pairs after baiting (relies on /1 and /2 header convention for read pairs) (default: no) + --denovo runs MIRA in denovo mode + --pair extend readpool to contain full read pairs, even if only one member was baited (relies on /1 and /2 header convention for read pairs) (default: no). --verbose show detailed output of MIRA modules (default: no) --split split reference at positions with more than 5N (default: no) --help shows this helpful information --clean retain only the last 2 iteration directories (default: no) --trimreads trim data (default: no; we recommend to trim beforehand and feed MITObim with pre trimmed data) - --trimoverhang trim overhang up- and downstream of reference (default: no) - --missmatch number of allowed missmatches in mapping - only for illumina data (default: 15% of avg. read length) + --trimoverhang trim overhang up- and downstream of reference, i.e. don't extend the bait, just re-assemble (default: no) + --mismatch number of allowed mismatches in mapping - only for illumina data (default: 15% of avg. read length) --min_cov minimum average coverage of contigs to be retained (default: 0 - off) --min_len minimum length of contig to be retained as backbone (default: 0 - off) --mirapath full path to MIRA binaries (only needed if MIRA is not in PATH) @@ -65,7 +65,7 @@ --version display MITObim version \nexamples: ./MITObim.pl -start 1 -end 5 -sample StrainX -ref reference-mt -readpool illumina_readpool.fastq -maf initial_assembly.maf - ./MITObim.pl -end 10 --quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq\n\n"; + ./MITObim.pl -end 10 -quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq\n\n"; # --proofread applies proofreading (atm only to be used if starting the process from a single short seed reference) # --readlength read length of illumina library, default=150, relevant only for proofreading # --insert insert size of illumina library, default=300, relevant only for proofreading @@ -96,7 +96,7 @@ # "proofreading!" => \$proofreading, "trimreads!" => \$trim, "trimoverhang!" => \$trimoverhang, - "missmatch=i" => \$MM, + "mismatch=i" => \$MM, "platform=s" => \$platform, # "readlength=i" => \$readlength, # "insertsize=i" => \$insertsize, @@ -221,14 +221,14 @@ print "readlength: $readlength\n"; print "insertsize: $insertsize\n"; $MM = 0; - print "number of allowed missmatches in proofreading assembly: $MM\n"; + print "number of allowed mismatches in proofreading assembly: $MM\n"; $shme = "-AL:shme=$MM"; }elsif ((!$proofreading) && (!$mode) && ($platform eq "solexa")){ if ($MM == -1){ - print "number of missmatches in mapping assembly: default (15% of average read length loaded)\n"; + print "number of mismatches in mapping assembly: default (15% of average read length loaded)\n"; $shme = ""; }else { - print "number of missmatches in mapping assembly: $MM\n"; + print "number of mismatches in mapping assembly: $MM\n"; $shme = "-AL:shme=$MM"; } print "proofreading: off\n"; @@ -1018,14 +1018,14 @@ sub finalize_sequence{ } sub create_manifest { - my ($iter, $sampleID, $refID, $mmode, $trim, $platform, $solexa_missmatches, $pair, $overhang, $reads, $ref, $redirect, $NFS_warn); + my ($iter, $sampleID, $refID, $mmode, $trim, $platform, $solexa_mismatches, $pair, $overhang, $reads, $ref, $redirect, $NFS_warn); $iter = $_[0]; $sampleID = $_[1]; $refID = $_[2]; $mmode = $_[3]; $trim = $_[4]; $platform = $_[5]; - $solexa_missmatches = $_[6]; + $solexa_mismatches = $_[6]; $pair = $_[7]; $overhang = $_[8]; $reads = $_[9]; @@ -1040,7 +1040,7 @@ sub create_manifest { open (MANIFEST,">manifest.conf") or die $!; print MANIFEST "#manifest file for iteration $iter created by MITObim\n\nproject = $sampleID-$refID \njob = genome,$mmode,accurate - \nparameters = -NW:mrnl=0:cac=warn$NFS_warn -AS:nop=1 $redirect $overhang $platform $trim -CO:msr=no $solexa_missmatches\n"; + \nparameters = -NW:mrnl=0:cac=warn$NFS_warn -AS:nop=1 $redirect $overhang $platform $trim -CO:msr=no $solexa_mismatches\n"; my @technology = split("_",$platform); #-notraceinfo - if ($mmode eq "mapping"){