diff --git a/illumina.py b/illumina.py index 6a64cb92..35b7c049 100755 --- a/illumina.py +++ b/illumina.py @@ -453,7 +453,7 @@ def parser_common_barcodes(parser=argparse.ArgumentParser()): parser.add_argument('--JVMmemory', help='JVM virtual memory size (default: %(default)s)', default=tools.picard.ExtractIlluminaBarcodesTool.jvmMemDefault) - util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None))) + util.cmd.common_args(parser, (('threads',None), ('loglevel', None), ('version', None), ('tmp_dir', None))) util.cmd.attach_main(parser, main_common_barcodes) return parser @@ -506,7 +506,7 @@ def main_common_barcodes(args): except IndexError: barcode2_len = 0 - count_and_sort_barcodes(barcodes_tmpdir, args.outSummary, barcode1_len, barcode2_len, args.truncateToLength, args.includeNoise, args.omitHeader) + count_and_sort_barcodes(barcodes_tmpdir, args.outSummary, barcode1_len, barcode2_len, args.truncateToLength, args.includeNoise, args.omitHeader, args.threads) # clean up os.unlink(barcode_file) diff --git a/read_utils.py b/read_utils.py index 44e357d9..7fc3cd86 100755 --- a/read_utils.py +++ b/read_utils.py @@ -919,7 +919,7 @@ def _merge_fastqs_and_mvicuna(lb, files): return readList -def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None): +def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None, threads=None): ''' Remove duplicate reads from BAM file using M-Vicuna. The primary advantage to this approach over Picard's MarkDuplicates tool is that Picard requires that input reads are aligned to a reference, @@ -943,7 +943,7 @@ def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None): # For each library, merge FASTQs and run rmdup for entire library readListAll = mkstempfname('.keep_reads_all.txt') per_lb_read_lists = [] - with concurrent.futures.ProcessPoolExecutor(max_workers=util.misc.available_cpu_count()) as executor: + with concurrent.futures.ProcessPoolExecutor(max_workers=threads or util.misc.available_cpu_count()) as executor: futures = [executor.submit(_merge_fastqs_and_mvicuna, lb, files) for lb, files in lb_to_files.items()] for future in concurrent.futures.as_completed(futures): log.info("mvicuna finished processing library") @@ -972,7 +972,7 @@ def parser_rmdup_mvicuna_bam(parser=argparse.ArgumentParser()): default=tools.picard.FilterSamReadsTool.jvmMemDefault, help='JVM virtual memory size (default: %(default)s)' ) - util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None))) + util.cmd.common_args(parser, (('threads',None), ('loglevel', None), ('version', None), ('tmp_dir', None))) util.cmd.attach_main(parser, rmdup_mvicuna_bam, split_args=True) return parser diff --git a/util/cmd.py b/util/cmd.py index 35eb803e..5ab52ec9 100644 --- a/util/cmd.py +++ b/util/cmd.py @@ -17,6 +17,7 @@ import util.version import util.file +import util.misc __author__ = "dpark@broadinstitute.org" __version__ = util.version.get_version() @@ -76,15 +77,13 @@ def common_args(parser, arglist=(('tmp_dir', None), ('loglevel', None))): the end, even if there's a failure.""", default=False) elif k == 'threads': - if v is None: - text_default = "all available cores" - else: - text_default = v + # if v is None, sanitize_thread_count() sets count to all available cores + thread_count = util.misc.sanitize_thread_count(v) parser.add_argument('--threads', dest="threads", type=int, - help="Number of threads (default: {})".format(text_default), - default=v) + help="Number of threads; by default all cores are used", + default=thread_count) elif k == 'version': if not v: v = __version__