diff --git a/README.md b/README.md
index 79ec186f..a8c9da60 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,6 @@ Prepare an `assemblysheet.csv` file with following columns representing target a
- `fasta:` FASTA file
- `gff3 [Optional]:` GFF3 annotation file if available
- `monoploid_ids [Optional]:` A txt file listing the IDs used to calculate LAI in monoploid mode if necessary
-- `hic_reads [Optional]:` A SRA id such as 'SRR8238190' or path to paired reads such as 'PG_PETUNIA_HiC_CGYCF_CACTCA_L001_R{1,2}.fastq.gz'
- `synteny_labels [Optional]:` A two column tsv file listing fasta sequence ids (first column) and labels for the synteny plots (second column) when performing synteny analysis
Now, you can run the pipeline using:
diff --git a/assets/assemblysheet.csv b/assets/assemblysheet.csv
index d8801b5f..19333351 100644
--- a/assets/assemblysheet.csv
+++ b/assets/assemblysheet.csv
@@ -1,2 +1,2 @@
-tag,fasta,gff3,monoploid_ids,hic_reads,synteny_labels
-FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,,,
+tag,fasta,gff3,monoploid_ids,synteny_labels
+FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,,
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 88f9a89b..981c7786 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -35,23 +35,6 @@
}
]
},
- "hic_reads": {
- "errorMessage": "HiC reads should either be provided as a SRA ID or as a path to paired reads with pattern '*R{1,2}.(fastq|fq).gz'",
- "anyOf": [
- {
- "type": "string",
- "pattern": "^SR\\w+$"
- },
- {
- "type": "string",
- "pattern": "^\\S+R\\{1,2\\}\\.f(ast)?q\\.gz$"
- },
- {
- "type": "string",
- "maxLength": 0
- }
- ]
- },
"synteny_labels": {
"errorMessage": "Synteny labels tsv path cannot contain spaces and must have extension '.tsv'",
"anyOf": [
diff --git a/modules.json b/modules.json
index d9161f6a..d68ea0ee 100644
--- a/modules.json
+++ b/modules.json
@@ -90,6 +90,16 @@
"git_sha": "89ff95427f695086369d7927a3c17cea2a37a382",
"installed_by": ["modules"]
},
+ "fastp": {
+ "branch": "master",
+ "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56",
+ "installed_by": ["fastq_trim_fastp_fastqc"]
+ },
+ "fastqc": {
+ "branch": "master",
+ "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c",
+ "installed_by": ["fastq_trim_fastp_fastqc"]
+ },
"gunzip": {
"branch": "master",
"git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
@@ -133,6 +143,11 @@
"branch": "master",
"git_sha": "2b21fbeb20ad9f17612f4a3dd7b12971513f08d5",
"installed_by": ["subworkflows"]
+ },
+ "fastq_trim_fastp_fastqc": {
+ "branch": "master",
+ "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
+ "installed_by": ["subworkflows"]
}
}
}
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml
new file mode 100644
index 00000000..70389e66
--- /dev/null
+++ b/modules/nf-core/fastp/environment.yml
@@ -0,0 +1,7 @@
+name: fastp
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::fastp=0.23.4
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
new file mode 100644
index 00000000..2a3b679e
--- /dev/null
+++ b/modules/nf-core/fastp/main.nf
@@ -0,0 +1,120 @@
+process FASTP {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
+ 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ path adapter_fasta
+ val save_trimmed_fail
+ val save_merged
+
+ output:
+ tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
+ tuple val(meta), path('*.json') , emit: json
+ tuple val(meta), path('*.html') , emit: html
+ tuple val(meta), path('*.log') , emit: log
+ path "versions.yml" , emit: versions
+ tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
+ tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ // Use single ended for interleaved. Add --interleaved_in in config.
+ if ( task.ext.args?.contains('--interleaved_in') ) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --stdout \\
+ --in1 ${prefix}.fastq.gz \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2) \\
+ | gzip -c > ${prefix}.fastp.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
+ fastp \\
+ --in1 ${prefix}.fastq.gz \\
+ --out1 ${prefix}.fastp.fastq.gz \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ } else {
+ def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
+ fastp \\
+ --in1 ${prefix}_1.fastq.gz \\
+ --in2 ${prefix}_2.fastq.gz \\
+ --out1 ${prefix}_1.fastp.fastq.gz \\
+ --out2 ${prefix}_2.fastp.fastq.gz \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $merge_fastq \\
+ --thread $task.cpus \\
+ --detect_adapter_for_pe \\
+ $args \\
+ 2> >(tee ${prefix}.fastp.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+ }
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
+ def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
+ def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
+ """
+ touch $touch_reads
+ touch "${prefix}.fastp.json"
+ touch "${prefix}.fastp.html"
+ touch "${prefix}.fastp.log"
+ $touch_merged
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
new file mode 100644
index 00000000..c22a16ab
--- /dev/null
+++ b/modules/nf-core/fastp/meta.yml
@@ -0,0 +1,75 @@
+name: fastp
+description: Perform adapter/quality trimming on sequencing reads
+keywords:
+ - trimming
+ - quality control
+ - fastq
+tools:
+ - fastp:
+ description: |
+ A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
+ documentation: https://github.com/OpenGene/fastp
+ doi: 10.1093/bioinformatics/bty560
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively. If you wish to run interleaved paired-end data, supply as single-end data
+ but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+ - adapter_fasta:
+ type: file
+ description: File in FASTA format containing possible adapters to remove.
+ pattern: "*.{fasta,fna,fas,fa}"
+ - save_trimmed_fail:
+ type: boolean
+ description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
+ - save_merged:
+ type: boolean
+ description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: The trimmed/modified/unmerged fastq reads
+ pattern: "*fastp.fastq.gz"
+ - json:
+ type: file
+ description: Results in JSON format
+ pattern: "*.json"
+ - html:
+ type: file
+ description: Results in HTML format
+ pattern: "*.html"
+ - log:
+ type: file
+ description: fastq log file
+ pattern: "*.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - reads_fail:
+ type: file
+ description: Reads the failed the preprocessing
+ pattern: "*fail.fastq.gz"
+ - reads_merged:
+ type: file
+ description: Reads that were successfully merged
+ pattern: "*.{merged.fastq.gz}"
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
new file mode 100644
index 00000000..9b3f9a38
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -0,0 +1,723 @@
+nextflow_process {
+
+ name "Test Process FASTP"
+ script "../main.nf"
+ process "FASTP"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastp"
+
+ test("test_fastp_single_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases:
12.922000 K (92.984097%)",
+ "single end (151 cycles)" ]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 99" ]
+ def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("test_fastp_single_end_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_single_end-_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_single_end") }
+ )
+ }
+ }
+
+ test("test_fastp_single_end-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_single_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_single_end_stub") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+ def log_text = [ "No adapter detected for read1",
+ "Q30 bases: 12281(88.3716%)"]
+ def json_text = ['"passed_filter_reads": 198']
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_paired_end") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_paired_end-stub") }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved") {
+ config './nextflow.config'
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "paired end (151 cycles + 151 cycles)"]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 198"]
+ def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_interleaved-_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_interleaved") }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved-stub") {
+
+ options '-stub'
+
+ config './nextflow.config'
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_interleaved-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_interleaved-stub") }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_trim_fail") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 12.922000 K (92.984097%)",
+ "single end (151 cycles)"]
+ def log_text = [ "Q20 bases: 12922(92.9841%)",
+ "reads passed filter: 99" ]
+ def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
+ }
+ },
+ { failed_read_lines.each { failed_read_line ->
+ { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_trim_fail") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
+ "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
+ def log_text = [ "No adapter detected for read1",
+ "Q30 bases: 12281(88.3716%)"]
+ def json_text = ['"passed_filter_reads": 198']
+ def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
+ "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
+ "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { failed_read2_lines.each { failed_read2_line ->
+ { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ ""]
+ def log_text = [ "Merged and filtered:",
+ "total reads: 75",
+ "total bases: 13683"]
+ def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683']
+ def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+ "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+ "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { read_merged_lines.each { read_merged_line ->
+ { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_merged_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_paired_end_merged") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged-stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
+ process.out.json.collect { file(it[1]).getName() } +
+ process.out.html.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.reads_fail.collect { file(it[1]).getName() } +
+ process.out.reads_merged.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("test_fastp_paired_end_merged-for_stub_match")
+ },
+ { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged_adapterlist") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ])
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ input[1] = adapter_fasta
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ def html_text = [ ""]
+ def log_text = [ "Merged and filtered:",
+ "total reads: 75",
+ "total bases: 13683"]
+ def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"]
+ def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
+ "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
+ "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
+ { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
+ }
+ },
+ { read2_lines.each { read2_line ->
+ { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
+ }
+ },
+ { read_merged_lines.each { read_merged_line ->
+ { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
+ }
+ },
+ { html_text.each { html_part ->
+ { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
+ }
+ },
+ { json_text.each { json_part ->
+ { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
+ }
+ },
+ { log_text.each { log_part ->
+ { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
+ }
+ },
+ { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
new file mode 100644
index 00000000..b4c0e1dd
--- /dev/null
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -0,0 +1,330 @@
+{
+ "fastp test_fastp_interleaved_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:08:06.123035"
+ },
+ "test_fastp_paired_end_merged-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "test.merged.fastq.gz",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:10:13.467574"
+ },
+ "versions_interleaved": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:56:24.615634793"
+ },
+ "test_fastp_single_end_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:06:00.223817"
+ },
+ "versions_paired_end": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:55:42.333545689"
+ },
+ "test_fastp_paired_end_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:03:06.431833729"
+ },
+ "test_fastp_interleaved-_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:03:37.827323085"
+ },
+ "test_fastp_paired_end_merged_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "test.merged.fastq.gz",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:08:44.496251446"
+ },
+ "versions_single_end_stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:55:27.354051299"
+ },
+ "versions_interleaved-stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:56:46.535528418"
+ },
+ "versions_single_end_trim_fail": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:03.724591407"
+ },
+ "test_fastp_paired_end-for_stub_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:07:15.398827"
+ },
+ "versions_paired_end-stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:56:06.50017282"
+ },
+ "versions_single_end": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:55:07.67921647"
+ },
+ "versions_paired_end_merged_stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:47.350653154"
+ },
+ "test_fastp_interleaved-for_stub_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:08:06.127974"
+ },
+ "versions_paired_end_trim_fail": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:18.140484878"
+ },
+ "test_fastp_single_end-for_stub_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:06:00.244202"
+ },
+ "test_fastp_single_end-_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:57:30.791982648"
+ },
+ "versions_paired_end_merged_adapterlist": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:05:37.845370554"
+ },
+ "versions_paired_end_merged": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:32.860543858"
+ },
+ "test_fastp_single_end_trim_fail_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:08:41.942317"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config
new file mode 100644
index 00000000..0f7849ad
--- /dev/null
+++ b/modules/nf-core/fastp/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+
+ withName: FASTP {
+ ext.args = "--interleaved_in"
+ }
+}
diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml
new file mode 100644
index 00000000..c1afcce7
--- /dev/null
+++ b/modules/nf-core/fastp/tests/tags.yml
@@ -0,0 +1,2 @@
+fastp:
+ - modules/nf-core/fastp/**
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
new file mode 100644
index 00000000..1787b38a
--- /dev/null
+++ b/modules/nf-core/fastqc/environment.yml
@@ -0,0 +1,7 @@
+name: fastqc
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::fastqc=0.12.1
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
new file mode 100644
index 00000000..9e19a74c
--- /dev/null
+++ b/modules/nf-core/fastqc/main.nf
@@ -0,0 +1,55 @@
+process FASTQC {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
+ 'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.html"), emit: html
+ tuple val(meta), path("*.zip") , emit: zip
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Make list of old name and new name pairs to use for renaming in the bash while loop
+ def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+ def rename_to = old_new_pairs*.join(' ').join(' ')
+ def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+ """
+ printf "%s %s\\n" $rename_to | while read old_name new_name; do
+ [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+ done
+
+ fastqc \\
+ $args \\
+ --threads $task.cpus \\
+ $renamed_files
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.html
+ touch ${prefix}.zip
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
new file mode 100644
index 00000000..ee5507e0
--- /dev/null
+++ b/modules/nf-core/fastqc/meta.yml
@@ -0,0 +1,57 @@
+name: fastqc
+description: Run FastQC on sequenced reads
+keywords:
+ - quality control
+ - qc
+ - adapters
+ - fastq
+tools:
+ - fastqc:
+ description: |
+ FastQC gives general quality metrics about your reads.
+ It provides information about the quality score distribution
+ across your reads, the per base sequence content (%A/C/G/T).
+ You get information about adapter contamination and other
+ overrepresented sequences.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+ licence: ["GPL-2.0-only"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - html:
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - zip:
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
new file mode 100644
index 00000000..70edae4d
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -0,0 +1,212 @@
+nextflow_process {
+
+ name "Test Process FASTQC"
+ script "../main.nf"
+ process "FASTQC"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastqc"
+
+ test("sarscov2 single-end [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
+ // looks like this:
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("fastqc_versions_single") }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("fastqc_versions_paired") }
+ )
+ }
+ }
+
+ test("sarscov2 interleaved [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [bam]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("fastqc_versions_bam") }
+ )
+ }
+ }
+
+ test("sarscov2 multiple [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+ { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+ { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][2]).text.contains("File type | Conventional base calls | ") },
+ { assert path(process.out.html[0][1][3]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("fastqc_versions_multiple") }
+ )
+ }
+ }
+
+ test("sarscov2 custom_prefix") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'mysample', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+
+ { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
+
+ { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastq] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.html.collect { file(it[1]).getName() } +
+ process.out.zip.collect { file(it[1]).getName() } +
+ process.out.versions ).match("fastqc_stub") }
+ )
+ }
+ }
+
+}
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
new file mode 100644
index 00000000..86f7c311
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -0,0 +1,88 @@
+{
+ "fastqc_versions_interleaved": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:40:07.293713"
+ },
+ "fastqc_stub": {
+ "content": [
+ [
+ "test.html",
+ "test.zip",
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:31:01.425198"
+ },
+ "fastqc_versions_multiple": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:40:55.797907"
+ },
+ "fastqc_versions_bam": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:40:26.795862"
+ },
+ "fastqc_versions_single": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:39:27.043675"
+ },
+ "fastqc_versions_paired": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:39:47.584191"
+ },
+ "fastqc_versions_custom_prefix": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:41:14.576531"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml
new file mode 100644
index 00000000..7834294b
--- /dev/null
+++ b/modules/nf-core/fastqc/tests/tags.yml
@@ -0,0 +1,2 @@
+fastqc:
+ - modules/nf-core/fastqc/**
diff --git a/nextflow.config b/nextflow.config
index ec565e3c..cf50c3f7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,7 +43,7 @@ params {
kraken2_db_path = null
// HiC options
- hic_skip = true
+ hic = null
hic_skip_fastp = false
hic_skip_fastqc = false
hic_fastp_ext_args = '--qualified_quality_phred 20 --length_required 50'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index add593cc..fcbfcbd1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -183,10 +183,11 @@
"description": "",
"default": "",
"properties": {
- "hic_skip": {
- "type": "boolean",
- "default": true,
- "description": "Skip HiC contact map construction"
+ "hic": {
+ "type": "string",
+ "description": "HiC reads",
+ "help_text": "Path to reads provided as a SRA ID or as a path to paired reads with pattern '*R{1,2}.(fastq|fq).gz'",
+ "pattern": "^SR\\w+$|^\\S+R\\{1,2\\}\\.f(ast)?q\\.gz$"
},
"hic_skip_fastp": {
"type": "boolean",
diff --git a/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf b/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf
new file mode 100644
index 00000000..39a086ad
--- /dev/null
+++ b/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf
@@ -0,0 +1,106 @@
+//
+// Read QC and trimming
+//
+
+include { FASTQC as FASTQC_RAW } from '../../../modules/nf-core/fastqc/main'
+include { FASTQC as FASTQC_TRIM } from '../../../modules/nf-core/fastqc/main'
+include { FASTP } from '../../../modules/nf-core/fastp/main'
+
+//
+// Function that parses fastp json output file to get total number of reads after trimming
+//
+import groovy.json.JsonSlurper
+
+def getFastpReadsAfterFiltering(json_file) {
+ def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary')
+ return json['after_filtering']['total_reads'].toLong()
+}
+
+workflow FASTQ_TRIM_FASTP_FASTQC {
+ take:
+ ch_reads // channel: [ val(meta), path(reads) ]
+ ch_adapter_fasta // channel: [ path(fasta) ]
+ val_save_trimmed_fail // value: boolean
+ val_save_merged // value: boolean
+ val_skip_fastp // value: boolean
+ val_skip_fastqc // value: boolean
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ ch_fastqc_raw_html = Channel.empty()
+ ch_fastqc_raw_zip = Channel.empty()
+ if (!val_skip_fastqc) {
+ FASTQC_RAW (
+ ch_reads
+ )
+ ch_fastqc_raw_html = FASTQC_RAW.out.html
+ ch_fastqc_raw_zip = FASTQC_RAW.out.zip
+ ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first())
+ }
+
+ ch_trim_reads = ch_reads
+ ch_trim_json = Channel.empty()
+ ch_trim_html = Channel.empty()
+ ch_trim_log = Channel.empty()
+ ch_trim_reads_fail = Channel.empty()
+ ch_trim_reads_merged = Channel.empty()
+ ch_fastqc_trim_html = Channel.empty()
+ ch_fastqc_trim_zip = Channel.empty()
+ if (!val_skip_fastp) {
+ FASTP (
+ ch_reads,
+ ch_adapter_fasta,
+ val_save_trimmed_fail,
+ val_save_merged
+ )
+ ch_trim_reads = FASTP.out.reads
+ ch_trim_json = FASTP.out.json
+ ch_trim_html = FASTP.out.html
+ ch_trim_log = FASTP.out.log
+ ch_trim_reads_fail = FASTP.out.reads_fail
+ ch_trim_reads_merged = FASTP.out.reads_merged
+ ch_versions = ch_versions.mix(FASTP.out.versions.first())
+
+ //
+ // Filter empty FastQ files after adapter trimming so FastQC doesn't fail
+ //
+ ch_trim_reads
+ .join(ch_trim_json)
+ .map { meta, reads, json ->
+ if (json.text.readLines().size < 1) {
+ return [ meta, reads ]
+ }
+
+ if (getFastpReadsAfterFiltering(json) > 0) {
+ [ meta, reads ]
+ }
+ }
+ .set { ch_trim_reads }
+
+ if (!val_skip_fastqc) {
+ FASTQC_TRIM (
+ ch_trim_reads
+ )
+ ch_fastqc_trim_html = FASTQC_TRIM.out.html
+ ch_fastqc_trim_zip = FASTQC_TRIM.out.zip
+ ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first())
+ }
+ }
+
+ emit:
+ reads = ch_trim_reads // channel: [ val(meta), path(reads) ]
+ trim_json = ch_trim_json // channel: [ val(meta), path(json) ]
+ trim_html = ch_trim_html // channel: [ val(meta), path(html) ]
+ trim_log = ch_trim_log // channel: [ val(meta), path(log) ]
+ trim_reads_fail = ch_trim_reads_fail // channel: [ val(meta), path(fastq.gz) ]
+ trim_reads_merged = ch_trim_reads_merged // channel: [ val(meta), path(fastq.gz) ]
+
+ fastqc_raw_html = ch_fastqc_raw_html // channel: [ val(meta), path(html) ]
+ fastqc_raw_zip = ch_fastqc_raw_zip // channel: [ val(meta), path(zip) ]
+ fastqc_trim_html = ch_fastqc_trim_html // channel: [ val(meta), path(html) ]
+ fastqc_trim_zip = ch_fastqc_trim_zip // channel: [ val(meta), path(zip) ]
+
+ versions = ch_versions.ifEmpty(null) // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/nf-core/fastq_trim_fastp_fastqc/meta.yml b/subworkflows/nf-core/fastq_trim_fastp_fastqc/meta.yml
new file mode 100644
index 00000000..9f4e12e0
--- /dev/null
+++ b/subworkflows/nf-core/fastq_trim_fastp_fastqc/meta.yml
@@ -0,0 +1,108 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_trim_fastp_fastqc"
+description: Read QC, fastp trimming and read qc
+keywords:
+ - qc
+ - quality_control
+ - adapters
+ - trimming
+ - fastq
+components:
+ - fastqc
+ - fastp
+input:
+ - ch_reads:
+ type: file
+ description: |
+ Structure: [ val(meta), path (reads) ]
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ], List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively. If you wish to run interleaved paired-end data, supply as single-end data
+ but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+ - ch_adapter_fasta:
+ type: file
+ description: |
+ Structure: path(adapter_fasta)
+ File in FASTA format containing possible adapters to remove.
+ - val_save_trimmed_fail:
+ type: boolean
+ description: |
+ Structure: val(save_trimmed_fail)
+ Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
+ - val_save_merged:
+ type: boolean
+ description: |
+ Structure: val(save_merged)
+ Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
+ - val_skip_fastqc:
+ type: boolean
+ description: |
+ Structure: val(skip_fastqc)
+ skip the fastqc process if true
+ - val_skip_fastp:
+ type: boolean
+ description: |
+ Structure: val(skip_fastp)
+ skip the fastp process if true
+output:
+ - meta:
+ type: value
+ description: Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ Structure: [ val(meta), path(reads) ]
+ The trimmed/modified/unmerged fastq reads
+ - trim_json:
+ type: file
+ description: |
+ Structure: [ val(meta), path(trim_json) ]
+ Results in JSON format
+ - trim_html:
+ type: file
+ description: |
+ Structure: [ val(meta), path(trim_html) ]
+ Results in HTML format
+ - trim_log:
+ type: file
+ description: |
+ Structure: [ val(meta), path(trim_log) ]
+ fastq log file
+ - trim_reads_fail:
+ type: file
+ description: |
+ Structure: [ val(meta), path(trim_reads_fail) ]
+ Reads the failed the preprocessing
+ - trim_reads_merged:
+ type: file
+ description: |
+ Structure: [ val(meta), path(trim_reads_merged) ]
+ Reads that were successfully merged
+ - fastqc_raw_html:
+ type: file
+ description: |
+ Structure: [ val(meta), path(fastqc_raw_html) ]
+ Raw fastQC report
+ - fastqc_raw_zip:
+ type: file
+ description: |
+ Structure: [ val(meta), path(fastqc_raw_zip) ]
+ Raw fastQC report archive
+ - fastqc_trim_html:
+ type: file
+ description: |
+ Structure: [ val(meta), path(fastqc_trim_html) ]
+ Trimmed fastQC report
+ - fastqc_trim_zip:
+ type: file
+ description: |
+ Structure: [ val(meta), path(fastqc_trim_zip) ]
+ Trimmed fastQC report archive
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@Joon-Klaps"
+maintainers:
+ - "@Joon-Klaps"
diff --git a/tests/stub/assemblysheet.csv b/tests/stub/assemblysheet.csv
index dd67ad3b..69b0eb4f 100644
--- a/tests/stub/assemblysheet.csv
+++ b/tests/stub/assemblysheet.csv
@@ -1,2 +1,2 @@
-tag,fasta,gff3,monoploid_ids,hic_reads,synteny_labels
-FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/FI1.monoploid.seqs.txt,"https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/docs/test_files/hic/stub_hic.R{1,2}.fq.gz",https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/FI1.seq.labels.tsv
+tag,fasta,gff3,monoploid_ids,synteny_labels
+FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/tests/stub/FI1.monoploid.seqs.txt,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/FI1.seq.labels.tsv
diff --git a/tests/stub/stub.config b/tests/stub/stub.config
index 4f8a1ce5..5664ccc0 100644
--- a/tests/stub/stub.config
+++ b/tests/stub/stub.config
@@ -20,6 +20,8 @@ params {
kraken2_skip = false
kraken2_db_path = 'tests/stub/kraken2/k2_minusb_20231009.tar.gz'
+ hic = 'tests/stub/hic/Dummy_hic.R{1,2}.fq.gz'
+
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf
index f82c3a34..48e3c7d2 100644
--- a/workflows/assemblyqc.nf
+++ b/workflows/assemblyqc.nf
@@ -50,6 +50,7 @@ include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/ma
include { GUNZIP as GUNZIP_GFF3 } from '../modules/nf-core/gunzip/main'
include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main'
include { FASTA_EXPLORE_SEARCH_PLOT_TIDK } from '../subworkflows/nf-core/fasta_explore_search_plot_tidk/main'
+include { FASTQ_TRIM_FASTP_FASTQC } from '../subworkflows/nf-core/fastq_trim_fastp_fastqc/main'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
@@ -70,7 +71,7 @@ workflow ASSEMBLYQC {
ch_input = Channel.fromSamplesheet('input')
ch_target_assemby_branch = ch_input
- | map { tag, fasta, gff, mono_ids, reads, labels ->
+ | map { tag, fasta, gff, mono_ids, labels ->
[ [ id: tag ], file(fasta, checkIfExists: true) ]
}
| branch { meta, fasta ->
@@ -79,7 +80,7 @@ workflow ASSEMBLYQC {
}
ch_assemby_gff3_branch = ch_input
- | map { tag, fasta, gff, mono_ids, reads, labels ->
+ | map { tag, fasta, gff, mono_ids, labels ->
gff
? [ [ id: tag ], file(gff, checkIfExists: true) ]
: null
@@ -90,12 +91,23 @@ workflow ASSEMBLYQC {
}
ch_mono_ids = ch_input
- | map { tag, fasta, gff, mono_ids, reads, labels ->
+ | map { tag, fasta, gff, mono_ids, labels ->
mono_ids
? [ [ id: tag ], file(mono_ids, checkIfExists: true) ]
: null
}
+ ch_hic_reads = !params.hic
+ ? Channel.empty()
+ : (
+ "$params.hic".find(/.*[\/].*\.(fastq|fq)\.gz/)
+ ? Channel.fromFilePairs(params.hic, checkIfExists: true)
+ : Channel.fromSRA(params.hic)
+ )
+ | map{ sample, fq ->
+ [ [ id: sample, single_end: false ], fq ]
+ }
+
// MODULE: GUNZIP as GUNZIP_FASTA
GUNZIP_FASTA ( ch_target_assemby_branch.gz )
@@ -352,6 +364,20 @@ workflow ASSEMBLYQC {
ch_kraken2_plot = FASTA_KRAKEN2.out.plot
ch_versions = ch_versions.mix(FASTA_KRAKEN2.out.versions)
+ // SUBWORKFLOW: FASTQ_TRIM_FASTP_FASTQC
+
+ FASTQ_TRIM_FASTP_FASTQC(
+ ch_hic_reads,
+ [],
+ true, // val_save_trimmed_fail
+ false, // val_save_merged
+ params.hic_skip_fastp,
+ params.hic_skip_fastqc
+ )
+
+ ch_cleaned_paired_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads
+ ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions)
+
// MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|