Skip to content

Commit

Permalink
Threads per fastp/minimap2 processes parameterized
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-cowart committed Dec 6, 2023
1 parent 5724bf5 commit f894a23
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
14 changes: 11 additions & 3 deletions sequence_processing_pipeline/NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
minimap_database_paths, queue_name, node_count,
wall_time_limit, jmem, fastp_path, minimap2_path,
samtools_path, modules_to_load, qiita_job_id, pool_size,
max_array_length, known_adapters_path, bucket_size=8):
max_array_length, known_adapters_path, bucket_size=8,
length_limit=100, worker_threads=7):
"""
Submit a slurm job where the contents of fastq_root_dir are processed
using fastp, minimap2, and samtools. Human-genome sequences will be
Expand All @@ -43,7 +44,8 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
:param pool_size: The number of jobs to process concurrently.
:param known_adapters_path: The path to an .fna file of known adapters.
:param bucket_size: the size in GB of each bucket to process
:param length_limit: reads shorter than this will be discarded.
:param worker_threads: Number of threads per fastp/minimap2 process.
"""
super().__init__(fastq_root_dir,
output_path,
Expand Down Expand Up @@ -79,6 +81,8 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
self.counts = {}
self.known_adapters_path = known_adapters_path
self.max_file_list_size_in_gb = bucket_size
self.length_limit = length_limit
self.worker_threads = worker_threads
self.temp_dir = join(self.output_path, 'tmp')
makedirs(self.temp_dir, exist_ok=True)

Expand Down Expand Up @@ -347,13 +351,17 @@ def _generate_job_script(self):
# should be 4 * 24 * 60 = 4 days
wall_time_limit=self.wall_time_limit,
mem_in_gb=self.jmem,
# number of nodes requested (-N)
node_count=1,
# cores-per-task (-c)
cores_per_task=4,
knwn_adpt_path=self.known_adapters_path,
output_path=self.output_path,
html_path=html_path,
json_path=json_path,
demux_path=demux_path,
temp_dir=self.temp_dir))
temp_dir=self.temp_dir,
length_limit=self.length_limit,
worker_threads=self.worker_threads))

return job_script_path
6 changes: 3 additions & 3 deletions sequence_processing_pipeline/templates/nuqc_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ do
echo "${i} ${r1_name} ${r2_name} ${base}" >> ${TMPDIR}/id_map

fastp \
-l 100 \
-l {{length_limit}} \
-i ${r1} \
-I ${r2} \
-w 7 \
-w {{worker_threads}} \
--adapter_fasta {{knwn_adpt_path}} \
--html {{html_path}}/${html_name} \
--json {{json_path}}/${json_name} \
Expand All @@ -110,7 +110,7 @@ function minimap2_runner () {
mmi=$1

echo "$(date) :: $(basename ${mmi})"
minimap2 -2 -ax sr -t 7 ${mmi} ${TMPDIR}/seqs.fastq | \
minimap2 -2 -ax sr -t {{worker_threads}} ${mmi} ${TMPDIR}/seqs.fastq | \
samtools fastq -@ 1 -f 12 -F 256 > ${TMPDIR}/seqs_new.fastq
mv ${TMPDIR}/seqs_new.fastq ${TMPDIR}/seqs.fastq
}
Expand Down

0 comments on commit f894a23

Please sign in to comment.