Skip to content

Commit

Permalink
Merge pull request #322 from maxibor/strip_fastq
Browse files Browse the repository at this point in the history
Move extract map reads fastq compression to pigz
  • Loading branch information
jfy133 authored Dec 19, 2019
2 parents d8ccb82 + f332e4d commit 66196c5
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 53 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
* Fixed Travis-Ci.org to Travis-Ci.com migration issues
* [#266](https://github.com/nf-core/eager/issues/266) - Added sanity checks for input filetypes (i.e. only BAM files can be supplied if `--bam`)
* [#237](https://github.com/nf-core/eager/issues/237) - Fixed and Updated script scrape_software_versions
* [#322](https://github.com/nf-core/eager/pull/322) - Move extract map reads fastq compression to pigz

### `Dependencies`

Expand Down
71 changes: 21 additions & 50 deletions bin/extract_map_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,54 +179,25 @@ def write_fq(fq_dict, fname, mode):
- fname(string) Path to output fastq file
- mode(string) strip (remove read) or replace (replace read sequence) by Ns
"""

if fname.endswith('.gz'):
with gzip.open(fname, 'wb') as f:
for k in list(fq_dict.keys()):
if mode == 'strip':
# if unmapped, write all the read lines
if fq_dict[k][0] == 'u':
f.write(f"@{k}\n".encode())
for i in fq_dict[k][1:]:
f.write(f"{i}\n".encode())
# if mapped, do not write the read lines
elif fq_dict[k][0] == 'm':
continue

elif mode == 'replace':
# if unmapped, write all the read lines
if fq_dict[k][0] == 'u':
f.write(f"@{k}\n".encode())
for i in fq_dict[k][1:]:
f.write(f"{i}\n".encode())
# if mapped, write all the read lines, but replace sequence
# by N*(len(sequence))
elif fq_dict[k][0] == 'm':
f.write(f"@{k}\n".encode())
f.write(f"{'N'*len(fq_dict[k][1])}\n".encode())
for i in fq_dict[k][2:]:
f.write(f"{i}\n".encode())

else:
with open(fname, 'w') as f:
for k in list(fq_dict.keys()):
if mode == 'strip':
if fq_dict[k][0] == 'u':
f.write(f"@{k}\n")
for i in fq_dict[k][1:]:
f.write(f"{i}\n")
elif fq_dict[k][0] == 'm':
continue
elif mode == 'replace':
if fq_dict[k][0] == 'u':
f.write(f"@{k}\n")
for i in fq_dict[k][1:]:
f.write(f"{i}\n")
elif fq_dict[k][0] == 'm':
f.write(f"@{k}\n")
f.write(f"{'N'*len(fq_dict[k][1])}\n")
for i in fq_dict[k][2:]:
f.write(f"{i}\n")
with open(fname, 'w') as f:
for k in list(fq_dict.keys()):
if mode == 'strip':
if fq_dict[k][0] == 'u':
f.write(f"@{k}\n")
for i in fq_dict[k][1:]:
f.write(f"{i}\n")
elif fq_dict[k][0] == 'm':
continue
elif mode == 'replace':
if fq_dict[k][0] == 'u':
f.write(f"@{k}\n")
for i in fq_dict[k][1:]:
f.write(f"{i}\n")
elif fq_dict[k][0] == 'm':
f.write(f"@{k}\n")
f.write(f"{'N'*len(fq_dict[k][1])}\n")
for i in fq_dict[k][2:]:
f.write(f"{i}\n")


def check_strip_mode(mode):
Expand All @@ -238,7 +209,7 @@ def check_strip_mode(mode):
BAM, IN_FWD, IN_REV, OUT_FWD, OUT_REV, MODE, PROC = _get_args()

if OUT_FWD == None:
out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq.gz"
out_fwd = f"{IN_FWD.split('/')[-1].split('.')[0]}.r1.fq"
else:
out_fwd = OUT_FWD

Expand All @@ -248,7 +219,7 @@ def check_strip_mode(mode):
write_fq(fwd_reads, out_fwd, MODE)
if IN_REV:
if OUT_REV == None:
out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq.gz"
out_rev = f"{IN_REV.split('/')[-1].split('.')[0]}.r2.fq"
else:
out_rev = OUT_REV
rev_dict = parse_fq(IN_REV)
Expand Down
9 changes: 6 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -1313,17 +1313,20 @@ process strip_input_fastq {

script:
if (params.singleEnd) {
out_fwd = bam.baseName+'.stripped.fq.gz'
out_fwd = bam.baseName+'.stripped.fq'
"""
samtools index $bam
extract_map_reads.py $bam ${fq[0]} -m ${params.strip_mode} -of $out_fwd -p ${task.cpus}
pigz -p ${task.cpus} $out_fwd
"""
} else {
out_fwd = bam.baseName+'.stripped.fwd.fq.gz'
out_rev = bam.baseName+'.stripped.rev.fq.gz'
out_fwd = bam.baseName+'.stripped.fwd.fq'
out_rev = bam.baseName+'.stripped.rev.fq'
"""
samtools index $bam
extract_map_reads.py $bam ${fq[0]} -rev ${fq[1]} -m ${params.strip_mode} -of $out_fwd -or $out_rev -p ${task.cpus}
pigz -p ${task.cpus} $out_fwd
pigz -p ${task.cpus} $out_rev
"""
}

Expand Down

0 comments on commit 66196c5

Please sign in to comment.