diff --git a/CHANGELOG.md b/CHANGELOG.md index 12e1fedd0..fbf7590f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Fixed Travis-Ci.org to Travis-Ci.com migration issues * [#266](https://github.com/nf-core/eager/issues/266) - Added sanity checks for input filetypes (i.e. only BAM files can be supplied if `--bam`) * [#237](https://github.com/nf-core/eager/issues/237) - Fixed and Updated script scrape_software_versions +* [#322](https://github.com/nf-core/eager/pull/322) - Move extract map reads fastq compression to pigz ### `Dependencies` diff --git a/bin/extract_map_reads.py b/bin/extract_map_reads.py index 3a4e1c563..48358fe23 100755 --- a/bin/extract_map_reads.py +++ b/bin/extract_map_reads.py @@ -179,54 +179,25 @@ def write_fq(fq_dict, fname, mode): - fname(string) Path to output fastq file - mode(string) strip (remove read) or replace (replace read sequence) by Ns """ - - if fname.endswith('.gz'): - with gzip.open(fname, 'wb') as f: - for k in list(fq_dict.keys()): - if mode == 'strip': - # if unmapped, write all the read lines - if fq_dict[k][0] == 'u': - f.write(f"@{k}\n".encode()) - for i in fq_dict[k][1:]: - f.write(f"{i}\n".encode()) - # if mapped, do not write the read lines - elif fq_dict[k][0] == 'm': - continue - - elif mode == 'replace': - # if unmapped, write all the read lines - if fq_dict[k][0] == 'u': - f.write(f"@{k}\n".encode()) - for i in fq_dict[k][1:]: - f.write(f"{i}\n".encode()) - # if mapped, write all the read lines, but replace sequence - # by N*(len(sequence)) - elif fq_dict[k][0] == 'm': - f.write(f"@{k}\n".encode()) - f.write(f"{'N'*len(fq_dict[k][1])}\n".encode()) - for i in fq_dict[k][2:]: - f.write(f"{i}\n".encode()) - - else: - with open(fname, 'w') as f: - for k in list(fq_dict.keys()): - if mode == 'strip': - if fq_dict[k][0] == 'u': - f.write(f"@{k}\n") - for i in fq_dict[k][1:]: - f.write(f"{i}\n") - elif fq_dict[k][0] == 'm': - continue - elif mode == 'replace': - if fq_dict[k][0] == 'u': - f.write(f"@{k}\n") - for i in fq_dict[k][1:]: - f.write(f"{i}\n") - elif fq_dict[k][0] == 'm': - f.write(f"@{k}\n") - f.write(f"{'N'*len(fq_dict[k][1])}\n") - for i in fq_dict[k][2:]: - f.write(f"{i}\n") + with open(fname, 'w') as f: + for k in list(fq_dict.keys()): + if mode == 'strip': + if fq_dict[k][0] == 'u': + f.write(f"@{k}\n") + for i in fq_dict[k][1:]: + f.write(f"{i}\n") + elif fq_dict[k][0] == 'm': + continue + elif mode == 'replace': + if fq_dict[k][0] == 'u': + f.write(f"@{k}\n") + for i in fq_dict[k][1:]: + f.write(f"{i}\n") + elif fq_dict[k][0] == 'm': + f.write(f"@{k}\n") + f.write(f"{'N'*len(fq_dict[k][1])}\n") + for i in fq_dict[k][2:]: + f.write(f"{i}\n") def check_strip_mode(mode): @@ -238,7 +209,7 @@ def check_strip_mode(mode): BAM, IN_FWD, IN_REV, OUT_FWD, OUT_REV, MODE, PROC = _get_args() if OUT_FWD == None: - out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq.gz" + out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq" else: out_fwd = OUT_FWD @@ -248,7 +219,7 @@ def check_strip_mode(mode): write_fq(fwd_reads, out_fwd, MODE) if IN_REV: if OUT_REV == None: - out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq.gz" + out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq" else: out_rev = OUT_REV rev_dict = parse_fq(IN_REV) diff --git a/main.nf b/main.nf index a1b8324b7..e9b05187d 100644 --- a/main.nf +++ b/main.nf @@ -1292,17 +1292,20 @@ process strip_input_fastq { script: if (params.singleEnd) { - out_fwd = bam.baseName+'.stripped.fq.gz' + out_fwd = bam.baseName+'.stripped.fq' """ samtools index $bam extract_map_reads.py $bam ${fq[0]} -m ${params.strip_mode} -of $out_fwd -p ${task.cpus} + pigz -p ${task.cpus} $out_fwd """ } else { - out_fwd = bam.baseName+'.stripped.fwd.fq.gz' - out_rev = bam.baseName+'.stripped.rev.fq.gz' + out_fwd = bam.baseName+'.stripped.fwd.fq' + out_rev = bam.baseName+'.stripped.rev.fq' """ samtools index $bam extract_map_reads.py $bam ${fq[0]} -rev ${fq[1]} -m ${params.strip_mode} -of $out_fwd -or $out_rev -p ${task.cpus} + pigz -p ${task.cpus} $out_fwd + pigz -p ${task.cpus} $out_rev """ }