From 036daf0b3f1dd7b9cc7443d0d51ae8370b469d40 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:12:03 +0100 Subject: [PATCH 01/46] first commit with kraken2 module --- modules.json | 50 +++++- .../nf-core/kraken2/kraken2/environment.yml | 7 + modules/nf-core/kraken2/kraken2/main.nf | 85 ++++++++++ modules/nf-core/kraken2/kraken2/meta.yml | 99 +++++++++++ .../kraken2/kraken2/tests/main.nf.test | 143 ++++++++++++++++ .../kraken2/kraken2/tests/main.nf.test.snap | 74 ++++++++ .../nf-core/kraken2/kraken2/tests/tags.yml | 3 + .../krona/ktimporttaxonomy/environment.yml | 5 + .../nf-core/krona/ktimporttaxonomy/main.nf | 54 ++++++ .../nf-core/krona/ktimporttaxonomy/meta.yml | 57 +++++++ .../krona/ktimporttaxonomy/tests/main.nf.test | 61 +++++++ .../ktimporttaxonomy/tests/main.nf.test.snap | 47 ++++++ .../krona/ktimporttaxonomy/tests/tags.yml | 2 + .../krona/ktupdatetaxonomy/environment.yml | 5 + .../nf-core/krona/ktupdatetaxonomy/main.nf | 44 +++++ .../nf-core/krona/ktupdatetaxonomy/meta.yml | 32 ++++ .../krona/ktupdatetaxonomy/tests/main.nf.test | 54 ++++++ .../ktupdatetaxonomy/tests/main.nf.test.snap | 38 +++++ modules/nf-core/untar/environment.yml | 7 + modules/nf-core/untar/main.nf | 84 ++++++++++ modules/nf-core/untar/meta.yml | 49 ++++++ modules/nf-core/untar/tests/main.nf.test | 85 ++++++++++ modules/nf-core/untar/tests/main.nf.test.snap | 158 ++++++++++++++++++ modules/nf-core/untar/tests/tags.yml | 2 + subworkflows/local/phylogenetic_qc.nf | 41 +++++ workflows/seqinspector.nf | 10 ++ 26 files changed, 1290 insertions(+), 6 deletions(-) create mode 100644 modules/nf-core/kraken2/kraken2/environment.yml create mode 100644 modules/nf-core/kraken2/kraken2/main.nf create mode 100644 modules/nf-core/kraken2/kraken2/meta.yml create mode 100644 modules/nf-core/kraken2/kraken2/tests/main.nf.test create mode 100644 modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap create mode 100644 modules/nf-core/kraken2/kraken2/tests/tags.yml create mode 100644 modules/nf-core/krona/ktimporttaxonomy/environment.yml create mode 100644 modules/nf-core/krona/ktimporttaxonomy/main.nf create mode 100644 modules/nf-core/krona/ktimporttaxonomy/meta.yml create mode 100644 modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test create mode 100644 modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap create mode 100644 modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml create mode 100644 modules/nf-core/krona/ktupdatetaxonomy/environment.yml create mode 100644 modules/nf-core/krona/ktupdatetaxonomy/main.nf create mode 100644 modules/nf-core/krona/ktupdatetaxonomy/meta.yml create mode 100644 modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test create mode 100644 modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap create mode 100644 modules/nf-core/untar/environment.yml create mode 100644 modules/nf-core/untar/main.nf create mode 100644 modules/nf-core/untar/meta.yml create mode 100644 modules/nf-core/untar/tests/main.nf.test create mode 100644 modules/nf-core/untar/tests/main.nf.test.snap create mode 100644 modules/nf-core/untar/tests/tags.yml create mode 100644 subworkflows/local/phylogenetic_qc.nf diff --git a/modules.json b/modules.json index 70f3486..98b68fd 100644 --- a/modules.json +++ b/modules.json @@ -8,12 +8,44 @@ "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "kraken2/kraken2": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "modules" + ] + }, + "krona/ktimporttaxonomy": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "modules" + ] + }, + "krona/ktupdatetaxonomy": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "untar": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "modules" + ] } } }, @@ -22,20 +54,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/kraken2/kraken2/environment.yml b/modules/nf-core/kraken2/kraken2/environment.yml new file mode 100644 index 0000000..ba776d3 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::kraken2=2.1.3" + - "coreutils=9.4" + - "pigz=2.8" diff --git a/modules/nf-core/kraken2/kraken2/main.nf b/modules/nf-core/kraken2/kraken2/main.nf new file mode 100644 index 0000000..364a6fe --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/main.nf @@ -0,0 +1,85 @@ +process KRAKEN2_KRAKEN2 { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' : + 'biocontainers/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' }" + + input: + tuple val(meta), path(reads) + path db + val save_output_fastqs + val save_reads_assignment + + output: + tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads.txt') , optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + kraken2 \\ + --db $db \\ + --threads $task.cpus \\ + --report ${prefix}.kraken2.report.txt \\ + --gzip-compressed \\ + $unclassified_option \\ + $classified_option \\ + $readclassification_option \\ + $paired \\ + $args \\ + $reads + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq.gz" : "${prefix}.classified_1.fastq.gz ${prefix}.classified_2.fastq.gz" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq.gz" : "${prefix}.unclassified_1.fastq.gz ${prefix}.unclassified_2.fastq.gz" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + touch ${prefix}.kraken2.report.txt + if [ "$save_output_fastqs" == "true" ]; then + touch $classified + touch $unclassified + fi + if [ "$save_reads_assignment" == "true" ]; then + touch ${prefix}.kraken2.classifiedreads.txt + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/kraken2/kraken2/meta.yml b/modules/nf-core/kraken2/kraken2/meta.yml new file mode 100644 index 0000000..8693764 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/meta.yml @@ -0,0 +1,99 @@ +name: kraken2_kraken2 +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - kraken2: + description: | + Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads + homepage: https://ccb.jhu.edu/software/kraken2/ + documentation: https://github.com/DerrickWood/kraken2/wiki/Manual + doi: 10.1186/s13059-019-1891-0 + licence: ["MIT"] + identifier: biotools:kraken2 +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - db: + type: directory + description: Kraken2 database + - - save_output_fastqs: + type: string + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - - save_reads_assignment: + type: string + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read +output: + - classified_reads_fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.classified{.,_}*": + type: file + description: | + Reads classified as belonging to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - unclassified_reads_fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.unclassified{.,_}*": + type: file + description: | + Reads not classified to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - classified_reads_assignment: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*classifiedreads.txt": + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*report.txt": + type: file + description: | + Kraken2 report containing stats about classified + and not classifed reads. + pattern: "*.{report.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/kraken2/kraken2/tests/main.nf.test b/modules/nf-core/kraken2/kraken2/tests/main.nf.test new file mode 100644 index 0000000..c0843df --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + name "Test Process KRAKEN2_KRAKEN2" + script "../main.nf" + process "KRAKEN2_KRAKEN2" + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "kraken2" + tag "kraken2/kraken2" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file( + params.modules_testdata_base_path + "genomics/sarscov2/genome/db/kraken2.tar.gz", + checkIfExists: true + ) + ]) + """ + } + } + } + + test("sarscov2 illumina single end [fastq]") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.versions, + ).match() + }, + { assert process.out.classified_reads_fastq.get(0).get(1) ==~ ".*/test.classified.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1) ==~ ".*/test.unclassified.fastq.gz" }, + ) + } + } + + test("sarscov2 illumina paired end [fastq]") { + when { + params { + outdir = "$outputDir" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + checkIfExists: true + ), + file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", + checkIfExists: true + ) + + ] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.versions, + ).match() + }, + { assert process.out.classified_reads_fastq.get(0).get(1).get(0) + ==~ ".*/test.classified_1.fastq.gz" }, + { assert process.out.classified_reads_fastq.get(0).get(1).get(1) + ==~ ".*/test.classified_2.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1).get(0) + ==~ ".*/test.unclassified_1.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1).get(1) + ==~ ".*/test.unclassified_2.fastq.gz" }, + ) + } + } + + test("sarscov2 illumina single end [fastq] + save_reads_assignment") { + when { + params { + outdir = "$outputDir" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.classified_reads_assignment, + process.out.versions, + ).match() + }, + ) + } + } +} diff --git a/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap b/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap new file mode 100644 index 0000000..b432f87 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "sarscov2 illumina single end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:03.745692" + }, + "sarscov2 illumina paired end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:13.75649" + }, + "sarscov2 illumina single end [fastq] + save_reads_assignment": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.classifiedreads.txt:md5,e7a90531f0d8d777316515c36fe4cae0" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:22.459465" + } +} \ No newline at end of file diff --git a/modules/nf-core/kraken2/kraken2/tests/tags.yml b/modules/nf-core/kraken2/kraken2/tests/tags.yml new file mode 100644 index 0000000..9ebfd7a --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/tags.yml @@ -0,0 +1,3 @@ +kraken2/kraken2: + - modules/nf-core/kraken2/kraken2/** + - modules/nf-core/untar/** diff --git a/modules/nf-core/krona/ktimporttaxonomy/environment.yml b/modules/nf-core/krona/ktimporttaxonomy/environment.yml new file mode 100644 index 0000000..342c589 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::krona=2.8.1 diff --git a/modules/nf-core/krona/ktimporttaxonomy/main.nf b/modules/nf-core/krona/ktimporttaxonomy/main.nf new file mode 100644 index 0000000..16f3cd9 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/main.nf @@ -0,0 +1,54 @@ +process KRONA_KTIMPORTTAXONOMY { + tag "${meta.id}" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1': + 'biocontainers/krona:2.8.1--pl5321hdfd78af_1' }" + + input: + tuple val(meta), path(report) + path taxonomy, stageAs: 'taxonomy.tab' + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.8.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;) + echo \$TAXONOMY + + ktImportTaxonomy \\ + $args \\ + -o ${prefix}.html \\ + -tax \$TAXONOMY/ \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.8.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/meta.yml b/modules/nf-core/krona/ktimporttaxonomy/meta.yml new file mode 100644 index 0000000..87f8478 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/meta.yml @@ -0,0 +1,57 @@ +name: krona_ktimporttaxonomy +description: KronaTools Import Taxonomy imports taxonomy classifications and produces + an interactive Krona plot. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several + Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + doi: 10.1186/1471-2105-12-385 + identifier: biotools:krona +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - report: + type: file + description: "A tab-delimited file with taxonomy IDs and (optionally) query + IDs, magnitudes, and scores. Query IDs are taken from column 1, taxonomy + IDs from column 2, and scores from column 3. Lines beginning with # will + be ignored." + pattern: "*.{tsv}" + - - taxonomy: + type: file + description: | + Path to a Krona taxonomy .tab file normally downloaded and generated by + krona/ktUpdateTaxonomy. Custom taxonomy files can have any name, but + must end in `.tab`. + pattern: "*tab" +output: + - html: + - meta: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + - "*.html": + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test new file mode 100644 index 0000000..1068f30 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process KRONA_KTIMPORTTAXONOMY" + script "../main.nf" + process "KRONA_KTIMPORTTAXONOMY" + + tag "modules" + tag "modules_nfcore" + tag "krona" + tag "krona/ktimporttaxonomy" + + test ("sarscov2 - metagenome - kraken report") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/krona_taxonomy.tab', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( process.out.versions ).match() }, + { assert file(process.out.html.get(0).get(1)).exists() } + ) + } + } + + test ("sarscov2 - metagenome - kraken report - stub") { + + options '-stub' + when { + process { + """ + input[0] = Channel.of([ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/test_1.kraken2.report.txt', checkIfExists: true) + ]) + input[1] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/krona_taxonomy.tab', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap new file mode 100644 index 0000000..61fba86 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "sarscov2 - metagenome - kraken report": { + "content": [ + [ + "versions.yml:md5,59fc89b6db8fad0aa9aa06f7437a18a7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-08-08T10:34:23.760055" + }, + "sarscov2 - metagenome - kraken report - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,59fc89b6db8fad0aa9aa06f7437a18a7" + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,59fc89b6db8fad0aa9aa06f7437a18a7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-08-08T10:34:29.695251" + } +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml b/modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml new file mode 100644 index 0000000..1112970 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/tests/tags.yml @@ -0,0 +1,2 @@ +krona/ktimporttaxonomy: + - "modules/nf-core/krona/ktimporttaxonomy/**" diff --git a/modules/nf-core/krona/ktupdatetaxonomy/environment.yml b/modules/nf-core/krona/ktupdatetaxonomy/environment.yml new file mode 100644 index 0000000..cb06934 --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::krona=2.7.1 diff --git a/modules/nf-core/krona/ktupdatetaxonomy/main.nf b/modules/nf-core/krona/ktupdatetaxonomy/main.nf new file mode 100644 index 0000000..2586f9c --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/main.nf @@ -0,0 +1,44 @@ +def VERSION='2.7.1' // Version information not provided by tool on CLI + +process KRONA_KTUPDATETAXONOMY { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.7.1--pl526_5' : + 'biocontainers/krona:2.7.1--pl526_5' }" + + input: + + output: + path 'taxonomy/taxonomy.tab', emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ktUpdateTaxonomy.sh \\ + $args \\ + taxonomy/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ + + stub: + """ + mkdir taxonomy + + touch taxonomy/taxonomy.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/ktupdatetaxonomy/meta.yml b/modules/nf-core/krona/ktupdatetaxonomy/meta.yml new file mode 100644 index 0000000..d7b5e80 --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/meta.yml @@ -0,0 +1,32 @@ +name: krona_ktupdatetaxonomy +description: KronaTools Update Taxonomy downloads a taxonomy database +keywords: + - database + - taxonomy + - krona + - visualisation +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several + Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: https://github.com/marbl/Krona/wiki/Installing + doi: + 10.1186/1471-2105-12-385 + # There is no input. This module downloads a pre-built taxonomy database for use with Krona Tools. + identifier: biotools:krona +output: + - db: + - taxonomy/taxonomy.tab: + type: file + description: A TAB separated file that contains a taxonomy database. + pattern: "*.{tab}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test new file mode 100644 index 0000000..672e82d --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test @@ -0,0 +1,54 @@ + +nextflow_process { + + name "Test Process KRONA_KTUPDATETAXONOMY" + script "../main.nf" + process "KRONA_KTUPDATETAXONOMY" + + tag "modules" + tag "modules_nfcore" + tag "krona" + tag "krona/ktupdatetaxonomy" + + test("test-krona-ktupdatetaxonomy") { + + when { + process { + """ + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.db[0]).name, //unstable + process.out.versions + ).match() + } + ) + } + } + + test("test-krona-ktupdatetaxonomy-stub") { + options '-stub' + + when { + process { + """ + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap new file mode 100644 index 0000000..fba9392 --- /dev/null +++ b/modules/nf-core/krona/ktupdatetaxonomy/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "test-krona-ktupdatetaxonomy": { + "content": [ + "taxonomy.tab", + [ + "versions.yml:md5,a0e095fdd3ba80fcc62188c4c1f38ff7" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-06T20:17:37.154632" + }, + "test-krona-ktupdatetaxonomy-stub": { + "content": [ + { + "0": [ + "taxonomy.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,a0e095fdd3ba80fcc62188c4c1f38ff7" + ], + "db": [ + "taxonomy.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,a0e095fdd3ba80fcc62188c4c1f38ff7" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-05T20:15:46.959212" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000..c779485 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000..9bd8f55 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000..290346b --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,49 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - untar: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 0000000..c957517 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 0000000..ceb91b7 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 0000000..feb6f15 --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf new file mode 100644 index 0000000..73cf865 --- /dev/null +++ b/subworkflows/local/phylogenetic_qc.nf @@ -0,0 +1,41 @@ +// +// Seqinspector Phylogenetic classification of reads, to check for contamination and adjacent issues +// + +include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/untar/main.nf' +include { KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main.nf' +include { KRONA_KTUPDATETAXONOMY } from '../../modules/nf-core/krona/ktupdatetaxonomy/main.nf' +include { KRONA_KTIMPORTTAXONOMY } from '../../modules/nf-core/krona/ktimporttaxonomy/main.nf' + +workflow PHYLOGENETIC_QC{ + take: + reads + + main: + ch_reads = reads + // + // MODULE: Untar kraken2_db + // + UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) + ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } + + // + // MODULE: Perform kraken2 + // + KRAKEN2 ( + ch_reads, ch_kraken2_db + ) + KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + + // + // MODULE: krona plot the kraken2 reports + // + KRONA_KTUPDATETAXONOMY() + KRONA_KTIMPORTTAXONOMY ( + KRAKEN2.out.report.map { meta, report -> [ report ] }.collect(), + KRONA_KTUPDATETAXONOMY.out.db + ) + + emit: + kraken2_report = KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 1ba00c6..41f084b 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -9,10 +9,13 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' +include { PHYLOGENETIC_QC } from '../modules/local/phylogenetic_qc' + include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' +include { PHYLOGENETIC_QC } from '../subworkflows/local/phylogenetic_qc.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -41,6 +44,13 @@ workflow SEQINSPECTOR { ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // + // SUBWORKFLOW: Run kraken2 and produce krona plots + // + PHYLOGENETIC_QC ( + ch_samplesheet + ) + // // Collate and save software versions // From e7c1e71d797b6823a3e3e6194f32633422eeae0b Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:29:14 +0100 Subject: [PATCH 02/46] added a database to the config --- modules.json | 38 ++++++++++---------------------------- nextflow.config | 3 +++ 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/modules.json b/modules.json index 98b68fd..0e4c137 100644 --- a/modules.json +++ b/modules.json @@ -8,44 +8,32 @@ "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -54,26 +42,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/nextflow.config b/nextflow.config index 70e433a..28d0104 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,6 +18,9 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false + // Kraken2 options + kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' + // MultiQC options multiqc_config = null multiqc_title = null From 4549e1a4a45d81de29ca0b6cb4b73cd0fb754a71 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:39:01 +0100 Subject: [PATCH 03/46] added kraken2 param to schema --- modules.json | 38 ++++++++++++++++++++++++++++---------- nextflow_schema.json | 9 ++++++++- workflows/seqinspector.nf | 2 +- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/modules.json b/modules.json index 0e4c137..98b68fd 100644 --- a/modules.json +++ b/modules.json @@ -8,32 +8,44 @@ "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -42,20 +54,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index 36308a0..c9c1b04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/seqinspector/master/nextflow_schema.json", "title": "nf-core/seqinspector pipeline parameters", "description": "Pipeline to QC your sequences", @@ -71,6 +71,13 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "kraken2_db": { + "type": "string", + "exists": true, + "mimetype": "text/plain", + "help_text": "Path to Kraken2 database file, built with kraken2 build", + "fa_icon": "fas fa-database" } } }, diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 41f084b..95e8030 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -9,7 +9,7 @@ include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' -include { PHYLOGENETIC_QC } from '../modules/local/phylogenetic_qc' +include { PHYLOGENETIC_QC } from '../subworkflows/local/phylogenetic_qc' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' From c1585102bf67d5c65e87d185aced9b3271cebe2f Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:40:50 +0100 Subject: [PATCH 04/46] readded modules --- modules.json | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/modules.json b/modules.json index 98b68fd..0e4c137 100644 --- a/modules.json +++ b/modules.json @@ -8,44 +8,32 @@ "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -54,26 +42,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From d058ccf2ce27423cc25c42395176aaf3c472a8f5 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:47:05 +0100 Subject: [PATCH 05/46] fixed kraken2 name --- subworkflows/local/phylogenetic_qc.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 73cf865..aae3cdb 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -3,7 +3,7 @@ // include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/untar/main.nf' -include { KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main.nf' +include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main.nf' include { KRONA_KTUPDATETAXONOMY } from '../../modules/nf-core/krona/ktupdatetaxonomy/main.nf' include { KRONA_KTIMPORTTAXONOMY } from '../../modules/nf-core/krona/ktimporttaxonomy/main.nf' @@ -22,20 +22,20 @@ workflow PHYLOGENETIC_QC{ // // MODULE: Perform kraken2 // - KRAKEN2 ( + KRAKEN2_KRAKEN2 ( ch_reads, ch_kraken2_db ) - KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() // // MODULE: krona plot the kraken2 reports // KRONA_KTUPDATETAXONOMY() KRONA_KTIMPORTTAXONOMY ( - KRAKEN2.out.report.map { meta, report -> [ report ] }.collect(), + KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect(), KRONA_KTUPDATETAXONOMY.out.db ) emit: - kraken2_report = KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + kraken2_report = KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() } From e9426649b669a8ad79de0e17d6f246a92a626d4b Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:54:41 +0100 Subject: [PATCH 06/46] added missing kraken2 options --- nextflow.config | 2 ++ nextflow_schema.json | 12 ++++++++++++ subworkflows/local/phylogenetic_qc.nf | 5 ++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 28d0104..4505198 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,8 @@ params { // Kraken2 options kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' + kraken2_save_reads = false + kraken2_save_readclassifications = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index c9c1b04..9c0ad75 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -40,6 +40,18 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "kraken2_save_reads": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on saving of Kraken2-aligned reads", + "help_text": "Save reads that do and do not have a taxonomic classification in your output results directory in FASTQ format.\n\n> Modifies tool parameter(s):\n> - kraken2: `--classified-out` and `--unclassified-out`" + }, + "kraken2_save_readclassifications": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Turn on saving of Kraken2 per-read taxonomic assignment file", + "help_text": "Save a text file that contains a list of each read that had a taxonomic assignment, with information on specific taxonomic taxonomic assignment that that read recieved.\n\n> Modifies tool parameter(s):\n> - kraken2: `--output`" } } }, diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index aae3cdb..621eee8 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -23,7 +23,10 @@ workflow PHYLOGENETIC_QC{ // MODULE: Perform kraken2 // KRAKEN2_KRAKEN2 ( - ch_reads, ch_kraken2_db + ch_reads, + ch_kraken2_db, + params.kraken2_save_reads, + params.kraken2_save_readclassifications ) KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() From 033df6e8ee3736e486e21e7048c9c06361678b61 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 11:59:36 +0100 Subject: [PATCH 07/46] fixed something in the linting --- workflows/seqinspector.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 95e8030..fbefe8f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -15,8 +15,6 @@ include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' -include { PHYLOGENETIC_QC } from '../subworkflows/local/phylogenetic_qc.nf' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW From c5edd677fca8aebd7fae9c6d3494c84159cd6b86 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 12:16:16 +0100 Subject: [PATCH 08/46] added output to multiqc --- subworkflows/local/phylogenetic_qc.nf | 4 ++-- workflows/seqinspector.nf | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 621eee8..33a86e7 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -28,14 +28,14 @@ workflow PHYLOGENETIC_QC{ params.kraken2_save_reads, params.kraken2_save_readclassifications ) - KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + //KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() // // MODULE: krona plot the kraken2 reports // KRONA_KTUPDATETAXONOMY() KRONA_KTIMPORTTAXONOMY ( - KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect(), + KRAKEN2_KRAKEN2.out.report, KRONA_KTUPDATETAXONOMY.out.db ) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index fbefe8f..b91c307 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -39,8 +39,6 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // // SUBWORKFLOW: Run kraken2 and produce krona plots @@ -49,6 +47,10 @@ workflow SEQINSPECTOR { ch_samplesheet ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip).mix(PHYLOGENETIC_QC.out.kraken2_report) + ch_multiqc_files.view() + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + // // Collate and save software versions // From 6c55393dc31b4088ee7c1c35d2e3a72f0d05d831 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 12:33:19 +0100 Subject: [PATCH 09/46] changed the kraken2 channels --- subworkflows/local/phylogenetic_qc.nf | 2 +- workflows/seqinspector.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 33a86e7..6e96260 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -40,5 +40,5 @@ workflow PHYLOGENETIC_QC{ ) emit: - kraken2_report = KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + kraken2_report = KRAKEN2_KRAKEN2.out.report.collect() } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index b91c307..b0836f2 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -47,7 +47,7 @@ workflow SEQINSPECTOR { ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip).mix(PHYLOGENETIC_QC.out.kraken2_report) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip).join(PHYLOGENETIC_QC.out.kraken2_report) ch_multiqc_files.view() ch_versions = ch_versions.mix(FASTQC.out.versions.first()) From 59c41080d77b25785984ebe7a9999f3230c4a55a Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 12:45:16 +0100 Subject: [PATCH 10/46] removed kraken2 from mutiqc for now --- workflows/seqinspector.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index b0836f2..1de5a04 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -47,8 +47,8 @@ workflow SEQINSPECTOR { ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip).join(PHYLOGENETIC_QC.out.kraken2_report) - ch_multiqc_files.view() + //ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip).join(PHYLOGENETIC_QC.out.kraken2_report) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // From bf8eaa2f9ce3a4f38f17c5ff87e385b0d9c74500 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 14:16:42 +0100 Subject: [PATCH 11/46] added kraken2 reports to the multqc channel --- workflows/seqinspector.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 1de5a04..9dec85b 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -39,6 +39,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) // // SUBWORKFLOW: Run kraken2 and produce krona plots @@ -47,8 +48,7 @@ workflow SEQINSPECTOR { ch_samplesheet ) - //ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip).join(PHYLOGENETIC_QC.out.kraken2_report) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) + ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.kraken2_report) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // From a9aba3ee70e7ee38a34685009ec6abe3afb7a6cc Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 14:23:57 +0100 Subject: [PATCH 12/46] trying other methods to creae the multiqc files channel --- subworkflows/local/phylogenetic_qc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 6e96260..33a86e7 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -40,5 +40,5 @@ workflow PHYLOGENETIC_QC{ ) emit: - kraken2_report = KRAKEN2_KRAKEN2.out.report.collect() + kraken2_report = KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() } From 07bd918bbacf6c15b81cae36a19d003a868ceef2 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 14:36:31 +0100 Subject: [PATCH 13/46] trying to pass kraken2 report to multiqc --- modules.json | 38 ++++++++++++++++++++------- subworkflows/local/phylogenetic_qc.nf | 7 ++++- workflows/seqinspector.nf | 6 +++-- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/modules.json b/modules.json index 0e4c137..98b68fd 100644 --- a/modules.json +++ b/modules.json @@ -8,32 +8,44 @@ "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -42,20 +54,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 33a86e7..f41fe22 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -13,6 +13,8 @@ workflow PHYLOGENETIC_QC{ main: ch_reads = reads + ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() // // MODULE: Untar kraken2_db // @@ -28,6 +30,8 @@ workflow PHYLOGENETIC_QC{ params.kraken2_save_reads, params.kraken2_save_readclassifications ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) //KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() // @@ -40,5 +44,6 @@ workflow PHYLOGENETIC_QC{ ) emit: - kraken2_report = KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + versions = ch_versions + mqc = ch_multiqc_files } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 9dec85b..839a302 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -40,6 +40,7 @@ workflow SEQINSPECTOR { ch_samplesheet ) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // // SUBWORKFLOW: Run kraken2 and produce krona plots @@ -48,8 +49,9 @@ workflow SEQINSPECTOR { ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.kraken2_report) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions) + // // Collate and save software versions From f4dc19a22a88171bb351299f10be228027446a6b Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 14:41:02 +0100 Subject: [PATCH 14/46] why is multiqc not working? --- workflows/seqinspector.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 839a302..94c3893 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -96,7 +96,7 @@ workflow SEQINSPECTOR { MULTIQC_GLOBAL ( ch_multiqc_files - .map { meta, file -> file } + //.map { meta, file -> file } .mix(ch_multiqc_extra_files) .collect(), ch_multiqc_config.toList(), From d430a27da94e6e765663f8cd862af5bd4a48f5e7 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 14:47:32 +0100 Subject: [PATCH 15/46] why is multiqc not working? --- workflows/seqinspector.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 94c3893..e985c9f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -39,7 +39,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // @@ -48,7 +48,6 @@ workflow SEQINSPECTOR { PHYLOGENETIC_QC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions) From 3e0d357988ac43bbbe0b619c4b83c84f13c74fcd Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 14:54:40 +0100 Subject: [PATCH 16/46] ugh --- workflows/seqinspector.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index e985c9f..facc0c9 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -95,7 +95,7 @@ workflow SEQINSPECTOR { MULTIQC_GLOBAL ( ch_multiqc_files - //.map { meta, file -> file } + .map { meta, file -> file } .mix(ch_multiqc_extra_files) .collect(), ch_multiqc_config.toList(), From b6c0d88a3584bd795678ecb416378543d2145d7a Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 15:55:39 +0100 Subject: [PATCH 17/46] removed kraken2 from multiqc files --- workflows/seqinspector.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index facc0c9..7b90d8f 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -39,7 +39,7 @@ workflow SEQINSPECTOR { FASTQC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // @@ -48,7 +48,7 @@ workflow SEQINSPECTOR { PHYLOGENETIC_QC ( ch_samplesheet ) - ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc.collect{it[1]}.ifEmpty([])) + //ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions) From ff8efdd6d0479b1030abda141819e58b081113a5 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 15:56:43 +0100 Subject: [PATCH 18/46] updated schema --- nextflow_schema.json | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 9c0ad75..8d78de5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -89,7 +92,8 @@ "exists": true, "mimetype": "text/plain", "help_text": "Path to Kraken2 database file, built with kraken2 build", - "fa_icon": "fas fa-database" + "fa_icon": "fas fa-database", + "default": "https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz" } } }, @@ -201,7 +205,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -312,4 +323,4 @@ "$ref": "#/definitions/generic_options" } ] -} +} \ No newline at end of file From 7b2f665db4f9962901232c0acbc55178ebebeb08 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:07:16 +0100 Subject: [PATCH 19/46] fixing some things --- docs/output.md | 39 +++++++++++++++++++++++++++++++++++++++ nextflow_schema.json | 2 +- workflows/seqinspector.nf | 2 +- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index e14c3ad..61151b3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,6 +11,8 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [FastQC](#fastqc) - Raw read QC +- [Kraken2](#kraken2) - Phylogenetic assignment of reads using k-mers +- [Krona](#krona) - Interactive visualization of Kraken2 results - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -27,6 +29,43 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +### Kraken2 + +[Kraken](https://ccb.jhu.edu/software/kraken2/) is a taxonomic sequence classifier that assigns taxonomic labels to DNA sequences. Kraken examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps -mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. + +
+Output files + +- `kraken2/` + - `_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `krakentools`) + - `/` + - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample + - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample + - `_.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample + +
+ +The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single database, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step. + +You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. + +### Krona + +[Krona](https://github.com/marbl/Krona) allows the exploration of (metagenomic) hierarchical data with interactive zooming, multi-layered pie charts. + +Krona charts will be generated by the pipeline for supported tools (Kraken2, Centrifuge, Kaiju, and MALT) + +
+Output files + +- `krona/` + - `_.html`: per-tool/per-database interactive HTML file containing hierarchical piecharts + +
+ +The resulting HTML files can be loaded into your web browser for exploration. Each file will have a dropdown to allow you to switch between each sample aligned against the given database of the tool. + + ### MultiQC nf-core/seqinspector will generate the following MultiQC reports: diff --git a/nextflow_schema.json b/nextflow_schema.json index 0128633..a7aeead 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -266,4 +266,4 @@ "$ref": "#/$defs/generic_options" } ] -} \ No newline at end of file +} diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index e49f73d..282a95c 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -10,7 +10,7 @@ include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' include { PHYLOGENETIC_QC } from '../subworkflows/local/phylogenetic_qc' -include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' From ca597f76e6bbeaa5b21c12b1f66eeef0062e48e1 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:11:26 +0100 Subject: [PATCH 20/46] changed schema version --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index a7aeead..75a123c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/seqinspector/master/nextflow_schema.json", "title": "nf-core/seqinspector pipeline parameters", "description": "Pipeline to QC your sequences", From 400a76b702b3812fb7901a28a4bd0a663940d02c Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:25:35 +0100 Subject: [PATCH 21/46] trying to unbreak things --- workflows/seqinspector.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 282a95c..e49f73d 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -10,7 +10,7 @@ include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' include { PHYLOGENETIC_QC } from '../subworkflows/local/phylogenetic_qc' -include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_seqinspector_pipeline' From 4906699ce767f01ec8dedaa77eba6ccea4728f43 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:32:24 +0100 Subject: [PATCH 22/46] added prettier --- docs/output.md | 1 - modules.json | 38 ++++++++++---------------------------- nextflow_schema.json | 14 ++------------ 3 files changed, 12 insertions(+), 41 deletions(-) diff --git a/docs/output.md b/docs/output.md index 61151b3..8a4c961 100644 --- a/docs/output.md +++ b/docs/output.md @@ -65,7 +65,6 @@ Krona charts will be generated by the pipeline for supported tools (Kraken2, Cen The resulting HTML files can be loaded into your web browser for exploration. Each file will have a dropdown to allow you to switch between each sample aligned against the given database of the tool. - ### MultiQC nf-core/seqinspector will generate the following MultiQC reports: diff --git a/modules.json b/modules.json index b3c1d5c..4600fff 100644 --- a/modules.json +++ b/modules.json @@ -8,44 +8,32 @@ "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -54,26 +42,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 75a123c..c8c6dd3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -172,14 +169,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 54ce9035abb036946ec06d3313d33f7baeae5109 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:35:24 +0100 Subject: [PATCH 23/46] further unbreaking things --- modules.json | 48 +++++++++++++------ .../utils_nfschema_plugin/tests/main.nf.test | 4 +- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/modules.json b/modules.json index 4600fff..0ec1449 100644 --- a/modules.json +++ b/modules.json @@ -7,33 +7,45 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["modules"] + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", - "git_sha": "19ca321db5d8bd48923262c2eca6422359633491", - "installed_by": ["modules"] + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -41,21 +53,27 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 842dc43..8fb3016 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -42,7 +42,7 @@ nextflow_workflow { params { test_data = '' - outdir = 1 + outdir = null } workflow { @@ -94,7 +94,7 @@ nextflow_workflow { params { test_data = '' - outdir = 1 + outdir = null } workflow { From a3ef27bed80a7df8ccea7cbccddc9b69bc23a829 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:36:34 +0100 Subject: [PATCH 24/46] re-updated schema --- nextflow_schema.json | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c8c6dd3..5edd071 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -169,7 +172,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -256,4 +266,4 @@ "$ref": "#/$defs/generic_options" } ] -} +} \ No newline at end of file From 4b7aaade1784b29f21009316acffc59870a29426 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 16:38:12 +0100 Subject: [PATCH 25/46] prettier --- modules.json | 38 ++++++++++---------------------------- nextflow_schema.json | 16 +++------------- 2 files changed, 13 insertions(+), 41 deletions(-) diff --git a/modules.json b/modules.json index 0ec1449..fe41766 100644 --- a/modules.json +++ b/modules.json @@ -8,44 +8,32 @@ "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -54,26 +42,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 5edd071..c8c6dd3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -172,14 +169,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -266,4 +256,4 @@ "$ref": "#/$defs/generic_options" } ] -} \ No newline at end of file +} From 0192830409eb3cdf9a9d73565a42d20c40672ac2 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 17:04:30 +0100 Subject: [PATCH 26/46] forgot to change changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0b12de..4911d1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane. - [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2. +- [#47](https://github.com/nf-core/seqinspector/pull/47) Added kraken2 subworkflow ### `Fixed` From a6f5c6b5903443e3a2818516e5ba486c3ce3f754 Mon Sep 17 00:00:00 2001 From: ctuni Date: Mon, 28 Oct 2024 17:22:44 +0100 Subject: [PATCH 27/46] added krona plots to the emit block and added check for uncompressed kraken2 db --- subworkflows/local/phylogenetic_qc.nf | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index f41fe22..021e7d6 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -16,10 +16,15 @@ workflow PHYLOGENETIC_QC{ ch_multiqc_files = Channel.empty() ch_versions = Channel.empty() // - // MODULE: Untar kraken2_db + // MODULE: Untar kraken2_db or read it as it is if not compressed // - UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) - ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } + if (params.kraken2_db.endsWith('.gz')) { + UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) + ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR.out.versions) + } else { + ch_kraken2_db = Channel.value([[:], file(params.kraken2_db, checkIfExists: true)]) + } // // MODULE: Perform kraken2 @@ -46,4 +51,5 @@ workflow PHYLOGENETIC_QC{ emit: versions = ch_versions mqc = ch_multiqc_files + krona_plots = KRONA_KTIMPORTTAXONOMY.out.html.collect() } From d8c31ec1dc6844ca590f970e2310ceefb0ccb9d6 Mon Sep 17 00:00:00 2001 From: ctuni Date: Tue, 29 Oct 2024 10:40:21 +0100 Subject: [PATCH 28/46] fixed typo --- subworkflows/local/phylogenetic_qc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 021e7d6..8c96c43 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -21,7 +21,7 @@ workflow PHYLOGENETIC_QC{ if (params.kraken2_db.endsWith('.gz')) { UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } - ch_versions = ch_versions.mix(UNTAR.out.versions) + ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) } else { ch_kraken2_db = Channel.value([[:], file(params.kraken2_db, checkIfExists: true)]) } From 7e41b19171ba94f24dadab66c1dd2cefe9943a65 Mon Sep 17 00:00:00 2001 From: ctuni Date: Tue, 29 Oct 2024 13:06:13 +0100 Subject: [PATCH 29/46] several improvements --- conf/modules.config | 9 +++++++ modules.json | 38 ++++++++++++++++++++------- nextflow.config | 1 + nextflow_schema.json | 7 ++++- subworkflows/local/phylogenetic_qc.nf | 16 +++++------ workflows/seqinspector.nf | 8 +++--- 6 files changed, 56 insertions(+), 23 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c883822..27058dd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,6 +22,15 @@ process { ext.args = '--quiet' } + withName: UNTAR { + publishDir = [ + path: { "${params.outdir}/kraken2_db" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_uncompressed_k2db + ] + } + withName: 'MULTIQC_GLOBAL' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/modules.json b/modules.json index fe41766..0ec1449 100644 --- a/modules.json +++ b/modules.json @@ -8,32 +8,44 @@ "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -42,20 +54,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index a4fd165..85d0c07 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,6 +23,7 @@ params { kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' kraken2_save_reads = false kraken2_save_readclassifications = false + save_uncompressed_k2db = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index c8c6dd3..9416535 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -255,5 +255,10 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "save_uncompressed_k2db": { + "type": "boolean" + } + } } diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 8c96c43..2512fd8 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -12,18 +12,18 @@ workflow PHYLOGENETIC_QC{ reads main: - ch_reads = reads - ch_multiqc_files = Channel.empty() - ch_versions = Channel.empty() + ch_reads = reads + ch_versions = Channel.empty() // // MODULE: Untar kraken2_db or read it as it is if not compressed // if (params.kraken2_db.endsWith('.gz')) { UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } - ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) + ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions.first()) } else { - ch_kraken2_db = Channel.value([[:], file(params.kraken2_db, checkIfExists: true)]) + ch_kraken2_db = Channel.fromPath(params.kraken2_db, checkIfExists: true) + ch_kraken2_db = ch_kraken2_db.collect() } // @@ -35,9 +35,7 @@ workflow PHYLOGENETIC_QC{ params.kraken2_save_reads, params.kraken2_save_readclassifications ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) - //KRAKEN2_KRAKEN2.out.report.map { meta, report -> [ report ] }.collect() + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first()) // // MODULE: krona plot the kraken2 reports @@ -50,6 +48,6 @@ workflow PHYLOGENETIC_QC{ emit: versions = ch_versions - mqc = ch_multiqc_files + mqc = KRAKEN2_KRAKEN2.out.report krona_plots = KRONA_KTIMPORTTAXONOMY.out.html.collect() } diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index e49f73d..05a5342 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -46,15 +46,16 @@ workflow SEQINSPECTOR { PHYLOGENETIC_QC ( ch_samplesheet ) - //ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc.collect{it[1]}.ifEmpty([])) - ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions) + // TODO: Uncomment this line for add the kraken2 report to the MultiQC (channel creation might need some further tweaking) + //ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out) + ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions.flatten()) // // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile( + .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', sort: true, @@ -92,6 +93,7 @@ workflow SEQINSPECTOR { ) ) + // .map { meta, file -> file } MULTIQC_GLOBAL ( ch_multiqc_files .map { meta, file -> file } From 82c68e76eca3654ac018f7bd7976e518f3ebfaf1 Mon Sep 17 00:00:00 2001 From: ctuni Date: Tue, 29 Oct 2024 14:06:24 +0100 Subject: [PATCH 30/46] prettier --- modules.json | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/modules.json b/modules.json index 0ec1449..fe41766 100644 --- a/modules.json +++ b/modules.json @@ -8,44 +8,32 @@ "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "kraken2/kraken2": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktupdatetaxonomy": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -54,26 +42,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From 3e9b674d1051adc82685150e2574dd5b93ff0c0c Mon Sep 17 00:00:00 2001 From: ctuni Date: Tue, 29 Oct 2024 15:46:14 +0100 Subject: [PATCH 31/46] updated citations --- CITATIONS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index ecbfb16..3cf66be 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,6 +14,14 @@ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. +- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + + > Wood, D. E., Lu, J., & Langmead, B. (2019). Improved metagenomic analysis with Kraken 2. Genome Biology, 20(1), 257. https://doi.org/10.1186/s13059-019-1891-0 + +- [Krona](https://doi.org/10.1186/1471-2105-12-385) + + > Ondov, B. D., Bergman, N. H., & Phillippy, A. M. (2011). Interactive metagenomic visualization in a Web browser. BMC Bioinformatics, 12. https://doi.org/10.1186/1471-2105-12-385 + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. From dbb92c16abf243b6c0907a14e888cf6a74614143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cristina=20Tu=C3=B1=C3=AD=20i=20Dom=C3=ADnguez?= Date: Tue, 29 Oct 2024 15:47:56 +0100 Subject: [PATCH 32/46] Update docs/output.md Co-authored-by: Natalia Garcia Garcia <122800769+nggvs@users.noreply.github.com> --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 8a4c961..2bf3d5c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -45,7 +45,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d -The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single database, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step. +The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest overview of the taxonomic classification results across all samples against a single database, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step. You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. From 0324ead8e9df0f5b046c3ad516ccab45af2ec62b Mon Sep 17 00:00:00 2001 From: ctuni Date: Tue, 29 Oct 2024 15:54:20 +0100 Subject: [PATCH 33/46] updated output --- docs/output.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/output.md b/docs/output.md index 8a4c961..b81f5a4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -37,7 +37,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Output files - `kraken2/` - - `_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `krakentools`) + - `.kraken2.report.txt`: A report containing information on the phylogenetic assignment of reads in a given sample. - `/` - `_.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample - `_.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample @@ -45,8 +45,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d -The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single database, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step. - +The main taxonomic classification file from Kraken2 is the `*report.txt` file. It gives you the most information for a single sample. You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassifications` parameters to the pipeline. ### Krona From 828abd6b642c63f3a8561f930fce5ea706d285be Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 11:06:28 +0100 Subject: [PATCH 34/46] added kraken2 reports to multiqc --- conf/modules.config | 18 +++++++++++++++++- subworkflows/local/phylogenetic_qc.nf | 4 ++-- workflows/seqinspector.nf | 5 ++--- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 27058dd..55ce845 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,7 +22,23 @@ process { ext.args = '--quiet' } - withName: UNTAR { + withName: 'KRAKEN2_KRAKEN2' { + publishDir = [ + path: { "${params.outdir}/kraken2_reports" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'KRONA_KTIMPORTTAXONOMY' { + publishDir = [ + path: { "${params.outdir}/kraken2_reports/krona_reports" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'UNTAR' { publishDir = [ path: { "${params.outdir}/kraken2_db" }, mode: params.publish_dir_mode, diff --git a/subworkflows/local/phylogenetic_qc.nf b/subworkflows/local/phylogenetic_qc.nf index 2512fd8..67c8bb2 100644 --- a/subworkflows/local/phylogenetic_qc.nf +++ b/subworkflows/local/phylogenetic_qc.nf @@ -20,7 +20,7 @@ workflow PHYLOGENETIC_QC{ if (params.kraken2_db.endsWith('.gz')) { UNTAR_KRAKEN2_DB ( [ [:], params.kraken2_db ]) ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } - ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions.first()) + ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) } else { ch_kraken2_db = Channel.fromPath(params.kraken2_db, checkIfExists: true) ch_kraken2_db = ch_kraken2_db.collect() @@ -35,7 +35,7 @@ workflow PHYLOGENETIC_QC{ params.kraken2_save_reads, params.kraken2_save_readclassifications ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first()) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions) // // MODULE: krona plot the kraken2 reports diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 05a5342..4acba4c 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -46,9 +46,8 @@ workflow SEQINSPECTOR { PHYLOGENETIC_QC ( ch_samplesheet ) - // TODO: Uncomment this line for add the kraken2 report to the MultiQC (channel creation might need some further tweaking) - //ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out) - ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions.flatten()) + ch_multiqc_files = ch_multiqc_files.mix(PHYLOGENETIC_QC.out.mqc) + ch_versions = ch_versions.mix(PHYLOGENETIC_QC.out.versions.first()) // From 3a22a9e17f21d5fdea250f7e9d9202637ff22435 Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 11:09:03 +0100 Subject: [PATCH 35/46] schema --- nextflow_schema.json | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 9416535..91471e4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -169,7 +172,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -261,4 +271,4 @@ "type": "boolean" } } -} +} \ No newline at end of file From b698d4117e2eed45fb5f574c8eb965bb5b6b1b8a Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 11:28:18 +0100 Subject: [PATCH 36/46] prettier --- modules.json | 2 +- nextflow_schema.json | 16 +++------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/modules.json b/modules.json index 9d8e596..cc0a254 100644 --- a/modules.json +++ b/modules.json @@ -30,7 +30,7 @@ "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, - "untar": { + "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] diff --git a/nextflow_schema.json b/nextflow_schema.json index d1f6489..da27d88 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -179,14 +176,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -278,4 +268,4 @@ "type": "boolean" } } -} \ No newline at end of file +} From 6dd485955c1a2b73b7fe4f6b6b1b46ece29ff768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cristina=20Tu=C3=B1=C3=AD=20i=20Dom=C3=ADnguez?= Date: Wed, 30 Oct 2024 11:31:47 +0100 Subject: [PATCH 37/46] Update workflows/seqinspector.nf Removed debugging commented statement Co-authored-by: Natalia Garcia Garcia <122800769+nggvs@users.noreply.github.com> --- workflows/seqinspector.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 0add142..b94d955 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -111,7 +111,6 @@ workflow SEQINSPECTOR { ) ) - // .map { meta, file -> file } MULTIQC_GLOBAL ( ch_multiqc_files .map { meta, file -> file } From 07dee47841125c4590c20f87b97fdf9d4a9aad70 Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 12:04:56 +0100 Subject: [PATCH 38/46] disabled the publish of the taxonomy file --- conf/modules.config | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index efc3cdc..d980ccb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -34,6 +34,15 @@ process { ] } + withName: 'KRONA_KUPDATETAXONOMY' { + publishDir = [ + path: { "${params.outdir}/kraken2_reports/krona_reports" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } + withName: 'KRONA_KTIMPORTTAXONOMY' { publishDir = [ path: { "${params.outdir}/kraken2_reports/krona_reports" }, From a60d5738e5dc953c5a41574b18326640e628c60c Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 12:15:45 +0100 Subject: [PATCH 39/46] removed default database for a null one --- nextflow.config | 2 +- nextflow_schema.json | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7d9560c..20069a3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,7 +20,7 @@ params { igenomes_ignore = false // Kraken2 options - kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' + kraken2_db = null kraken2_save_reads = false kraken2_save_readclassifications = false save_uncompressed_k2db = false diff --git a/nextflow_schema.json b/nextflow_schema.json index da27d88..f5340c5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -95,9 +95,8 @@ "type": "string", "exists": true, "mimetype": "text/plain", - "help_text": "Path to Kraken2 database file, built with kraken2 build", - "fa_icon": "fas fa-database", - "default": "https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz" + "description": "Path to Kraken2 database file, either a gzipped file or the path to the uncompressed database.", + "fa_icon": "fas fa-database" }, "igenomes_base": { "type": "string", From 24888870a8ae2a6ba30b0a0425411b4ce179e7d2 Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 12:22:11 +0100 Subject: [PATCH 40/46] added test kraken2 database to the test configs --- conf/test.config | 4 ++++ conf/test_full.config | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/conf/test.config b/conf/test.config index 76a7ad0..68e64f5 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,4 +29,8 @@ params { // Genome references genome = 'R64-1-1' + + // Kraken options + // Database information: https://github.com/nf-core/test-datasets/blob/taxprofiler/README.md#kraken2 + kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' } diff --git a/conf/test_full.config b/conf/test_full.config index 53b2288..30f9851 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,4 +21,8 @@ params { // Genome references genome = 'R64-1-1' + + // Kraken + // Database information: https://github.com/nf-core/test-datasets/blob/taxprofiler/README.md#kraken2 + kraken2_db = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' } From e28397a8f2c995e772da4a94150186b4f6c6921e Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 12:25:19 +0100 Subject: [PATCH 41/46] fixed typo --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d980ccb..982202d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -34,7 +34,7 @@ process { ] } - withName: 'KRONA_KUPDATETAXONOMY' { + withName: 'KRONA_KTUPDATETAXONOMY' { publishDir = [ path: { "${params.outdir}/kraken2_reports/krona_reports" }, mode: params.publish_dir_mode, From 9d8f631ce5beb7637701be60e64197750b8fa10e Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 15:31:46 +0100 Subject: [PATCH 42/46] miseq test is failing --- tests/MiSeq.main.nf.test.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index de0afa2..2e87e84 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -1,9 +1,9 @@ { "MiSeq data test": { "content": [ - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", - "multiqc_general_stats.txt:md5,5b28a83b14cb2fe88d084d08900ebdbf" + "multiqc_general_stats.txt:md5,be9f8771c6f9b8f306fb33de1f29049f" ], "meta": { "nf-test": "0.9.1", @@ -11,4 +11,4 @@ }, "timestamp": "2024-10-30T09:08:29.692511055" } -} \ No newline at end of file +} From 5d1a15b0b3af22e2d0802dcff182c6af4f399d02 Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 15:45:46 +0100 Subject: [PATCH 43/46] miseq test is failing --- tests/MiSeq.main.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/MiSeq.main.nf.test.snap b/tests/MiSeq.main.nf.test.snap index 2e87e84..e08de7d 100644 --- a/tests/MiSeq.main.nf.test.snap +++ b/tests/MiSeq.main.nf.test.snap @@ -3,7 +3,7 @@ "content": [ "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,7b1b7fd457b60404768045b148d4c0a8", - "multiqc_general_stats.txt:md5,be9f8771c6f9b8f306fb33de1f29049f" + "multiqc_general_stats.txt:md5,c1e396db353c3c9455da8f4003bd0c12" ], "meta": { "nf-test": "0.9.1", From 4729adafabe9deb82d2df6e76e3da4ab97ad41d1 Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 15:53:00 +0100 Subject: [PATCH 44/46] promethion test is failing --- tests/PromethION.main.nf.test.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/PromethION.main.nf.test.snap b/tests/PromethION.main.nf.test.snap index dfa4eb6..32ae86c 100644 --- a/tests/PromethION.main.nf.test.snap +++ b/tests/PromethION.main.nf.test.snap @@ -1,9 +1,9 @@ { "PromethION data test": { "content": [ - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,1a4b472e13cadc770832b0e20d1de7b0", - "multiqc_general_stats.txt:md5,409cefc7f17f95d176ced6032bf8fb32" + "multiqc_general_stats.txt:md5,52a5384aa9840efebf98863027829393" ], "meta": { "nf-test": "0.9.1", @@ -11,4 +11,4 @@ }, "timestamp": "2024-10-30T09:12:03.048502046" } -} \ No newline at end of file +} From bb8e3b0cefbc915f1e4a6ff608885a9aeb3729ee Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 16:01:35 +0100 Subject: [PATCH 45/46] novaseq test is failing --- tests/NovaSeq6000.main.nf.test.snap | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 62ccd4a..14ed6f3 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -1,21 +1,21 @@ { "NovaSeq6000 data test": { "content": [ - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", - "multiqc_general_stats.txt:md5,25abe0f6a35eb4a3b056fc3cf5c13732", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,064e9eeafdf7b52d4d4e63515cca87b8", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", - "multiqc_general_stats.txt:md5,90ee35137492b80aab36ef67f72d8921", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,6f85b0c7f383c2f212b2c4504fb3698c", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", - "multiqc_general_stats.txt:md5,d62a2fc39e674d98783d408791803148", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,3ce48439ef89139f2173e7031fa87385", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", - "multiqc_general_stats.txt:md5,2f09b8f199ac40cf67ba50843cebd29c", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,061de9865cc76ce26e76a2114f483ac6", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", - "multiqc_general_stats.txt:md5,6a1c16f068d7ba3a9225a17eb570ed9a" + "multiqc_general_stats.txt:md5,85f10745c22f43194babb3b29f9d0793" ], "meta": { "nf-test": "0.9.1", @@ -23,4 +23,4 @@ }, "timestamp": "2024-10-30T09:09:57.158871165" } -} \ No newline at end of file +} From 758ce127a7a0b5e39139ade3c191dc9e66b8f768 Mon Sep 17 00:00:00 2001 From: ctuni Date: Wed, 30 Oct 2024 16:12:44 +0100 Subject: [PATCH 46/46] novaseq test is failing --- tests/NovaSeq6000.main.nf.test.snap | 10 ++++----- tests/NovaSeq6000.main_subsample.nf.test.snap | 22 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/NovaSeq6000.main.nf.test.snap b/tests/NovaSeq6000.main.nf.test.snap index 14ed6f3..23437d7 100644 --- a/tests/NovaSeq6000.main.nf.test.snap +++ b/tests/NovaSeq6000.main.nf.test.snap @@ -3,19 +3,19 @@ "content": [ "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,3730f9046b20ac5c17a86db0a33f8d5d", - "multiqc_general_stats.txt:md5,064e9eeafdf7b52d4d4e63515cca87b8", + "multiqc_general_stats.txt:md5,be9f8771c6f9b8f306fb33de1f29049f", "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,8284e25ccc21041cf3b5a32eb6a51e78", - "multiqc_general_stats.txt:md5,6f85b0c7f383c2f212b2c4504fb3698c", + "multiqc_general_stats.txt:md5,472497f420b4cdcb9d25d83b2ae08cda", "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,f38ffdc112c73af3a41ed15848a3761f", - "multiqc_general_stats.txt:md5,3ce48439ef89139f2173e7031fa87385", + "multiqc_general_stats.txt:md5,04fdf3e35bf7e47c4652b3d01884593c", "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,7ff71ceb8ecdf086331047f8860c3347", - "multiqc_general_stats.txt:md5,061de9865cc76ce26e76a2114f483ac6", + "multiqc_general_stats.txt:md5,a6754bcb21f5a850680c9acb9f30d900", "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,519ff344a896ac369bba4d5c5b8be7b5", - "multiqc_general_stats.txt:md5,85f10745c22f43194babb3b29f9d0793" + "multiqc_general_stats.txt:md5,e51d8764200effa1563b744a9c1450fc" ], "meta": { "nf-test": "0.9.1", diff --git a/tests/NovaSeq6000.main_subsample.nf.test.snap b/tests/NovaSeq6000.main_subsample.nf.test.snap index 651973b..1afdeed 100644 --- a/tests/NovaSeq6000.main_subsample.nf.test.snap +++ b/tests/NovaSeq6000.main_subsample.nf.test.snap @@ -1,21 +1,21 @@ { "NovaSeq6000 data test sample size": { "content": [ - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,aba942d1e6996b579f19798e5673f514", - "multiqc_general_stats.txt:md5,ad1ec9c64cbdb1131a26aeb6de51e31c", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,064e9eeafdf7b52d4d4e63515cca87b8", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,aa1b8d6adae86005ea7a8b2e901099b8", - "multiqc_general_stats.txt:md5,c73c8d10568a56f6534d280fff701e60", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,6f85b0c7f383c2f212b2c4504fb3698c", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,ff996e1d3dc4a46e0c9535e54d51ccab", - "multiqc_general_stats.txt:md5,834e1868b887171cfda72029bbbe2d3f", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,3ce48439ef89139f2173e7031fa87385", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,3df36ecfe76b25b0c22dcda84bce2b3b", - "multiqc_general_stats.txt:md5,274a001b007521970f14d68bd176e5be", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_general_stats.txt:md5,061de9865cc76ce26e76a2114f483ac6", + "multiqc_citations.txt:md5,6c7f11ec8908181ce5daed75ddb6dc32", "multiqc_fastqc.txt:md5,ce61b4ce4b1d76ec3f20de3bf0c9ec7f", - "multiqc_general_stats.txt:md5,d476ad59458a035a329605d5284b6012" + "multiqc_general_stats.txt:md5,85f10745c22f43194babb3b29f9d0793" ], "meta": { "nf-test": "0.9.1", @@ -23,4 +23,4 @@ }, "timestamp": "2024-10-30T09:37:46.182191597" } -} \ No newline at end of file +}