From 40f27a5f7ac3e1cc92885b05851bdb44c9da67ef Mon Sep 17 00:00:00 2001 From: Zachary Foster Date: Wed, 30 Oct 2024 13:03:49 -0700 Subject: [PATCH] fixed bugs caused by merge --- .nf-core.yml | 2 +- assets/schema_input.json | 56 ++++-- conf/aps_workshop.config | 2 +- conf/base.config | 6 +- conf/chaos.config | 4 +- conf/complex.config | 4 +- conf/complex_minimal.config | 4 +- conf/complex_small.config | 4 +- conf/fungi_n81.config | 4 +- conf/high_complexity_kpneumoniae.config | 4 +- conf/mixed.config | 2 +- conf/mixed_bacteria.config | 4 +- conf/modules.config | 172 +++++++++--------- conf/mycobacteroides.config | 4 +- conf/mycobacteroides_small.config | 4 +- conf/ramorum_small.config | 4 +- conf/wagner_2023.config | 4 +- conf/wagner_2023_small.config | 4 +- conf/xanthomonas.config | 4 +- conf/xanthomonas_small.config | 4 +- initialclassification/families.txt | 0 initialclassification/genera.txt | 0 initialclassification/species.txt | 0 main.nf | 48 ++--- modules/nf-core/multiqc/main.nf | 6 +- nextflow.config | 87 ++++----- nextflow_schema.json | 37 +--- .../main.nf | 64 +++---- workflows/pathogensurveillance.nf | 153 ++++------------ workflows/plantpathsurveil.nf | 2 +- 30 files changed, 291 insertions(+), 402 deletions(-) delete mode 100644 initialclassification/families.txt delete mode 100644 initialclassification/genera.txt delete mode 100644 initialclassification/species.txt rename subworkflows/local/{utils_nfcore_plantpathsurveil_pipeline => utils_nfcore_pathogensurveillance_pipeline}/main.nf (86%) diff --git a/.nf-core.yml b/.nf-core.yml index 853b2bb3..e1f3d235 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -9,7 +9,7 @@ template: description: Surveillance of plant pathogens using high-throughput sequencing force: false is_nfcore: true - name: plantpathsurveil + name: pathogensurveillance org: nf-core outdir: . skip_features: null diff --git a/assets/schema_input.json b/assets/schema_input.json index ba1b72e2..f83bf8ef 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,33 +1,65 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/plantpathsurveil/master/assets/schema_input.json", - "title": "nf-core/plantpathsurveil pipeline - params.input schema", - "description": "Schema for the file provided with params.input", + "$id": "https://raw.githubusercontent.com/nf-core/pathogensurveillance/master/assets/schema_input.json", + "title": "nf-core/pathogensurveillance pipeline - params.sample_data schema", + "description": "Schema for the file provided with params.sample_data", "type": "array", "items": { "type": "object", "properties": { - "sample": { - "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] + "sample_id": { + "type": "string" + }, + "name": { + "type": "string" }, - "fastq_1": { + "description": { + "type": "string" + }, + "path": { "type": "string", "format": "file-path", "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, - "fastq_2": { + "path_2": { "type": "string", "format": "file-path", "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - } + }, + "ncbi_accession": { + "type": "string", + "pattern": "^[A-Z]{3}[0-9]+$", + "errorMessage": "The `ncbi_accession` column contains at least on value that does not look like an NCBI accession (3 captial letters followed by numbers)." + }, + "ncbi_query": { + "type": "string" + }, + "ncbi_query_max": { + "type": "string" + }, + "sequence_type": { + "type": "string" + }, + "report_group_ids": { + "type": "string" + }, + "color_by": { + "type": "string" + }, + "ploidy": { + "type": "number" + }, + "enabled": { + "type": "boolean" + }, + "ref_group_ids": { + "type": "string" + }, }, - "required": ["sample", "fastq_1"] + "required": [] } } diff --git a/conf/aps_workshop.config b/conf/aps_workshop.config index e9323f31..bbaff440 100644 --- a/conf/aps_workshop.config +++ b/conf/aps_workshop.config @@ -16,7 +16,7 @@ params { // Input data sample_data = 'test/data/metadata/aps_workshop.csv' - out_dir = 'aps_workshop_output' + outdir = 'aps_workshop_output' download_bakta_db = true cache_type = 'lenient' } diff --git a/conf/base.config b/conf/base.config index 08290d07..442c5d54 100644 --- a/conf/base.config +++ b/conf/base.config @@ -62,19 +62,19 @@ process { report { enabled = true - file = "${params.out_dir}/pipeline_info/execution_report.html" + file = "${params.outdir}/pipeline_info/execution_report.html" overwrite = true } timeline { enabled = true - file = "${params.out_dir}/pipeline_info/timeline_report.html" + file = "${params.outdir}/pipeline_info/timeline_report.html" overwrite = true } trace { enabled = true - file = "${params.out_dir}/pipeline_info/trace_report.tsv" + file = "${params.outdir}/pipeline_info/trace_report.tsv" overwrite = true fields = "task_id,hash,native_id,process,tag,status,exit,module,container,cpus,time,disk,memory,attempt,submit,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem,peak_rss,peak_vmem,rchar,wchar,syscr,syscw,read_bytes,write_bytes,vol_ctxt,inv_ctxt,workdir,scratch,error_action" } diff --git a/conf/chaos.config b/conf/chaos.config index 6c97bfda..f2e85817 100644 --- a/conf/chaos.config +++ b/conf/chaos.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -17,7 +17,7 @@ params { // Input data sample_data = 'test/data/metadata/chaos_samples.csv' reference_data = 'test/data/metadata/chaos_references.csv' - out_dir = 'test/output/chaos' + outdir = 'test/output/chaos' download_bakta_db = true } diff --git a/conf/complex.config b/conf/complex.config index 756ba284..3d05cd18 100644 --- a/conf/complex.config +++ b/conf/complex.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,7 +16,7 @@ params { // Input data sample_data = 'test/data/metadata/complex.csv' - out_dir = 'test/output/complex' + outdir = 'test/output/complex' download_bakta_db = true } diff --git a/conf/complex_minimal.config b/conf/complex_minimal.config index d8907913..5bd1eb74 100644 --- a/conf/complex_minimal.config +++ b/conf/complex_minimal.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,7 +16,7 @@ params { // Input data sample_data = 'test/data/metadata/complex_minimal.csv' - out_dir = 'test/output/complex_minimal' + outdir = 'test/output/complex_minimal' download_bakta_db = true } diff --git a/conf/complex_small.config b/conf/complex_small.config index c70b7968..b2ce7f7b 100644 --- a/conf/complex_small.config +++ b/conf/complex_small.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,7 +16,7 @@ params { // Input data sample_data = 'test/data/metadata/complex_small.csv' - out_dir = 'test/output/complex_small' + outdir = 'test/output/complex_small' download_bakta_db = true } diff --git a/conf/fungi_n81.config b/conf/fungi_n81.config index ca83bdf3..5ee1334e 100644 --- a/conf/fungi_n81.config +++ b/conf/fungi_n81.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile fungi_n81, --out_dir + nextflow run nf-core/plantpathsurveil -profile fungi_n81, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,6 +15,6 @@ params { // Input data sample_data = 'test/data/metadata/boxwood.csv' - out_dir = 'test/output/fungi_n81' + outdir = 'test/output/fungi_n81' download_bakta_db = false } diff --git a/conf/high_complexity_kpneumoniae.config b/conf/high_complexity_kpneumoniae.config index a4c0bd54..463bd509 100644 --- a/conf/high_complexity_kpneumoniae.config +++ b/conf/high_complexity_kpneumoniae.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile high_complexity_kpneumoniae, --out_dir + nextflow run nf-core/plantpathsurveil -profile high_complexity_kpneumoniae, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,7 +15,7 @@ params { // Input data sample_data = 'test/data/metadata/high_complexity_kpneumoniae.csv' - out_dir = 'test/output/high_complexity_kpneumoniae' + outdir = 'test/output/high_complexity_kpneumoniae' download_bakta_db = true } diff --git a/conf/mixed.config b/conf/mixed.config index 2e87b3af..5a7273eb 100644 --- a/conf/mixed.config +++ b/conf/mixed.config @@ -5,6 +5,6 @@ params { // Input data sample_data = 'test/data/metadata/mixed.csv' reference_data = 'test/data/metadata/mixed_references.csv' - out_dir = 'test/output/mixed' + outdir = 'test/output/mixed' download_bakta_db = true } diff --git a/conf/mixed_bacteria.config b/conf/mixed_bacteria.config index 21717606..96f8c407 100644 --- a/conf/mixed_bacteria.config +++ b/conf/mixed_bacteria.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile mixed_bacteria, --out_dir + nextflow run nf-core/plantpathsurveil -profile mixed_bacteria, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,6 +15,6 @@ params { // Input data sample_data = 'test/data/metadata/mixed_bacteria.csv' - out_dir = 'test/output/mixed_bacteria' + outdir = 'test/output/mixed_bacteria' download_bakta_db = true } diff --git a/conf/modules.config b/conf/modules.config index d573776e..7cc829f0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,7 +18,7 @@ process { publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -27,59 +27,59 @@ process { withName: SAMPLESHEET_CHECK { publishDir = [ - path: { "${params.out_dir}/pipeline_info" }, + path: { "${params.outdir}/pipeline_info" }, mode: check_prio(params.copymode,'high'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 1.GB * task.attempt } + time = { 1.h * task.attempt } } withName: INITIAL_CLASSIFICATION { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 1.GB * task.attempt } + time = { 1.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: PICK_ASSEMBLIES { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 1.GB * task.attempt } + time = { 1.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: DOWNLOAD_ASSEMBLIES { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 1.GB * task.attempt } + time = { 8.h * task.attempt } storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/assemblies" } errorStrategy = { sleep(Math.pow(4, task.attempt) * 60000 as long); return task.attempt > 2 ? 'ignore' : 'retry' } maxForks = 5 maxRetries = 3 maxErrors = 15 publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: MAKE_GFF_WITH_FASTA { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 1.GB * task.attempt } + time = { 1.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -88,18 +88,18 @@ process { withName: FASTQC { ext.args = '--quiet' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode, 'medium'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: SUBSET_READS { - cpus = { check_max( 2 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } + cpus = { 2 } + memory = { 1.GB * task.attempt } + time = { 2.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -107,7 +107,7 @@ process { withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ - path: { "${params.out_dir}/pipeline_info" }, + path: { "${params.outdir}/pipeline_info" }, mode: check_prio(params.copymode,'high'), pattern: '*_versions.yml' ] @@ -116,7 +116,7 @@ process { withName: SAMTOOLS_INDEX { ext.args = '-c' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -124,22 +124,22 @@ process { withName: BBMAP_SENDSKETCH { ext.args = 'tossbrokenreads=t printall=t reads=10m samplerate=0.5 minkeycount=2' - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 1.GB * task.attempt } + time = { 1.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: BWA_INDEX { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } // It seems `bwa index` might start using disk instead of RAM when running out of RAW rahter than just failing, slowing it down. - time = { check_max( 24.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 8.GB * task.attempt } // It seems `bwa index` might start using disk instead of RAM when running out of RAW rahter than just failing, slowing it do. + time = { 24.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -147,11 +147,11 @@ process { withName: BWA_MEM { ext.args = '-M' - cpus = { check_max( 8 , 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 48.h * task.attempt, 'time' ) } + cpus = { 8 } + memory = { 16.GB * task.attempt } + time = { 48.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -161,7 +161,7 @@ process { ext.prefix = { "${fasta.getBaseName()}" } cpus = 1 publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -174,7 +174,7 @@ process { ext.prefix = { "${meta.id}.formatted" } cpus = 1 publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -184,7 +184,7 @@ process { ext.args = { params.temp_dir ? "--temp ${params.temp_dir}" : "" } storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/reads" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -193,7 +193,7 @@ process { withName: TABIX_TABIX { ext.args = '-p vcf' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -203,7 +203,7 @@ process { ext.args = '--genotype-filter-expression "isHet == 1" --genotype-filter-name "isHetFilter" --set-filtered-genotype-to-no-call' ext.prefix = { "${meta.id}.variantfiltration" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -213,33 +213,33 @@ process { ext.args = '-f "ABHet < 0.0 | ABHet > 0.33" -f "ABHom < 0.0 | ABHom > 0.97" -f "MaxAASR > 0.4" -f "MQ > 30"' ext.prefix = { "${meta.id}.vcffilter" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: SPADES { - cpus = { check_max( 4 * task.attempt , 'cpus' ) } - memory = { check_max( 16.GB * Math.pow(4, task.attempt - 1), 'memory' ) } - time = { check_max( 24.h * task.attempt , 'time' ) } + cpus = { 4 * task.attempt } + memory = { 16.GB * Math.pow(4, task.attempt - 1)} + time = { 24.h * task.attempt } errorStrategy = { return task.attempt > 2 ? 'ignore' : 'retry' } maxRetries = 2 publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: 'FLYE.*' { - cpus = { check_max( 4 * task.attempt , 'cpus' ) } - memory = { check_max( 16.GB * Math.pow(4, task.attempt - 1), 'memory' ) } - time = { check_max( 24.h * task.attempt , 'time' ) } + cpus = { 4 * task.attempt } + memory = { 16.GB * Math.pow(4, task.attempt - 1)} + time = { 24.h * task.attempt } errorStrategy = { return task.attempt > 2 ? 'ignore' : 'retry' } maxRetries = 2 publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -248,7 +248,7 @@ process { withName: FILTER_ASSEMBLY { ext.args = '--cov_cutoff 1 --len_cutoff 100' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -257,7 +257,7 @@ process { withName: FIND_ASSEMBLIES { storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/assembly_metadata" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -266,7 +266,7 @@ process { withName: BAKTA_BAKTADBDOWNLOAD { ext.args = '--type light' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -274,7 +274,7 @@ process { withName: BAKTA_BAKTADBDOWNLOAD { publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -285,7 +285,7 @@ process { withName: BUSCO_DOWNLOAD { storeDir = { params.data_dir == "false" ? null : "${params.data_dir}/busco_db" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -294,7 +294,7 @@ process { withName: BAKTA_BAKTA { ext.args = '--force --skip-plot --skip-crispr' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -303,7 +303,7 @@ process { withName: MAFFT_SMALL { ext.prefix = { "${fasta.getSimpleName()}_aligned" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -312,7 +312,7 @@ process { withName: IQTREE2 { ext.args = '--seqtype DNA -m GTR' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -321,7 +321,7 @@ process { withName: IQTREE2_SNP { ext.args = '--seqtype DNA -m GTR+ASC' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -330,7 +330,7 @@ process { withName: NANOPLOT { ext.args = { "--prefix ${meta.id}_" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode, 'medium'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -340,7 +340,7 @@ process { ext.args = '-s 150 -W 150' ext.prefix = { "${meta.id}_chopped" } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -348,11 +348,11 @@ process { withName: SOURMASH_SKETCH { ext.args = "dna --param-string 'scaled=1000,k=21,k=31,k=51'" - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 8.GB * task.attempt } + time = { 4.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -361,11 +361,11 @@ process { withName: TRIM_AND_SKETCH { ext.args = "-C 3 -Z 18 -V" ext.args2 = "dna --param-string 'scaled=1000,k=21,k=31,k=51'" - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 16.GB * task.attempt } + time = { 12.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -373,11 +373,11 @@ process { withName: SOURMASH_COMPARE { ext.args = "-k 31 --ani" - cpus = { check_max( 4 * task.attempt, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } + cpus = { 4 * task.attempt } + memory = { 16.GB * task.attempt } + time = { 12.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -385,7 +385,7 @@ process { withName: 'MAIN_REPORT.*' { publishDir = [ - path: { "${params.out_dir}/reports" }, + path: { "${params.outdir}/reports" }, mode: check_prio(params.copymode, 'high'), overwrite: true, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -395,20 +395,20 @@ process { withName: 'GRAPHTYPER_VCFCONCATENATE' { cpus = 1 - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } + memory = { 16.GB * task.attempt } + time = { 12.h * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: PIRATE { - time = { check_max( 240.h * task.attempt, 'time' ) } - cpus = { check_max( 8 * task.attempt, 'cpus' ) } + time = { 240.h * task.attempt } + cpus = { 8 * task.attempt } publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -417,7 +417,7 @@ process { withName: GRAPHTYPER_GENOTYPE { ext.args = '--no_filter_on_proper_pairs' publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'low'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -425,7 +425,7 @@ process { withName: PREPARE_REPORT_INPUT { publishDir = [ - path: { "${params.out_dir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: check_prio(params.copymode,'high'), saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/mycobacteroides.config b/conf/mycobacteroides.config index 5eb3cd75..f6f95d03 100644 --- a/conf/mycobacteroides.config +++ b/conf/mycobacteroides.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile mycobacteroides, --out_dir + nextflow run nf-core/plantpathsurveil -profile mycobacteroides, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,6 +15,6 @@ params { // Input data sample_data = 'test/data/metadata/mycobacteroides.csv' - out_dir = 'test/output/mycobacteroides' + outdir = 'test/output/mycobacteroides' download_bakta_db = true } diff --git a/conf/mycobacteroides_small.config b/conf/mycobacteroides_small.config index f9d1221a..9a7bea29 100644 --- a/conf/mycobacteroides_small.config +++ b/conf/mycobacteroides_small.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile mycobacteroides_small, --out_dir + nextflow run nf-core/plantpathsurveil -profile mycobacteroides_small, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,7 +15,7 @@ params { // Input data sample_data = 'test/data/metadata/mycobacteroides_small.csv' - out_dir = 'test/output/mycobacteroides_small' + outdir = 'test/output/mycobacteroides_small' download_bakta_db = true } diff --git a/conf/ramorum_small.config b/conf/ramorum_small.config index 168663d1..79b4e9d4 100644 --- a/conf/ramorum_small.config +++ b/conf/ramorum_small.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/plantpathsurveil -profile ramorum_small, --out_dir + nextflow run nf-core/plantpathsurveil -profile ramorum_small, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,7 +15,7 @@ params { // Input data sample_data = 'test/data/metadata/ramorum_small.csv' - out_dir = 'test/output/ramorum_small' + outdir = 'test/output/ramorum_small' download_bakta_db = true } diff --git a/conf/wagner_2023.config b/conf/wagner_2023.config index bf1fa43d..62dc630f 100644 --- a/conf/wagner_2023.config +++ b/conf/wagner_2023.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,6 +16,6 @@ params { // Input data sample_data = 'test/data/metadata/wagner_2023.csv' - out_dir = 'test/output/wagner_2023' + outdir = 'test/output/wagner_2023' download_bakta_db = true } diff --git a/conf/wagner_2023_small.config b/conf/wagner_2023_small.config index f79c4c4b..06bec846 100644 --- a/conf/wagner_2023_small.config +++ b/conf/wagner_2023_small.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,6 +16,6 @@ params { // Input data sample_data = 'test/data/metadata/wagner_2023_small.csv' - out_dir = 'test/output/wagner_2023_small' + outdir = 'test/output/wagner_2023_small' download_bakta_db = true } diff --git a/conf/xanthomonas.config b/conf/xanthomonas.config index d3c4868a..eeda2338 100644 --- a/conf/xanthomonas.config +++ b/conf/xanthomonas.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,6 +16,6 @@ params { // Input data sample_data = 'test/data/metadata/xanthomonas.csv' - out_dir = 'test/output/xanthomonas' + outdir = 'test/output/xanthomonas' download_bakta_db = true } diff --git a/conf/xanthomonas_small.config b/conf/xanthomonas_small.config index 66d771c2..b3041f30 100644 --- a/conf/xanthomonas_small.config +++ b/conf/xanthomonas_small.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/pathogensurveillance -profile xanthomonas_small, --out_dir + nextflow run nf-core/pathogensurveillance -profile xanthomonas_small, --outdir ---------------------------------------------------------------------------------------- */ @@ -16,7 +16,7 @@ params { // Input data sample_data = 'test/data/metadata/xanthomonas_small.csv' - out_dir = 'test/output/xanthomonas_small' + outdir = 'test/output/xanthomonas_small' download_bakta_db = true } diff --git a/initialclassification/families.txt b/initialclassification/families.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/initialclassification/genera.txt b/initialclassification/genera.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/initialclassification/species.txt b/initialclassification/species.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/main.nf b/main.nf index c7d822ad..1bc97c9f 100644 --- a/main.nf +++ b/main.nf @@ -1,11 +1,11 @@ #!/usr/bin/env nextflow /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/PATHOGENDX + nf-core/PATHOGENSURVEILLANCE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Github : https://github.com/nf-core/plantpathsurveil - Website: https://nf-co.re/plantpathsurveil - Slack : https://nfcore.slack.com/channels/plantpathsurveil + Github : https://github.com/nf-core/pathogensurveillance + Website: https://nf-co.re/pathogensurveillance + Slack : https://nfcore.slack.com/channels/pathogensurveillance ---------------------------------------------------------------------------------------- */ @@ -16,35 +16,9 @@ */ include { PATHOGENSURVEILLANCE } from './workflows/pathogensurveillance' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_plantpathsurveil_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_plantpathsurveil_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_plantpathsurveil_pipeline' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_pathogensurveillance_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_pathogensurveillance_pipeline' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// WORKFLOW: Run main analysis pipeline depending on type of input -// -workflow NFCORE_PLANTPATHSURVEIL { - - take: - samplesheet // channel: samplesheet read in from --input - - main: - - // - // WORKFLOW: Run pipeline - // - PLANTPATHSURVEIL ( - samplesheet - ) - emit: - multiqc_report = PLANTPATHSURVEIL.out.multiqc_report // channel: /path/to/multiqc_report.html -} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -64,14 +38,16 @@ workflow { params.monochrome_logs, args, params.outdir, - params.input + params.sample_data, + params.reference_data ) // // WORKFLOW: Run main workflow // - NFCORE_PLANTPATHSURVEIL ( - PIPELINE_INITIALISATION.out.samplesheet + PATHOGENSURVEILLANCE ( + PIPELINE_INITIALISATION.out.sample_data_csv, + PIPELINE_INITIALISATION.out.reference_data_csv ) // @@ -84,7 +60,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_PLANTPATHSURVEIL.out.multiqc_report + PATHOGENSURVEILLANCE.out.multiqc_report ) } diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index f2e6abc8..9595afb2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -17,9 +17,9 @@ process MULTIQC { output: tuple val(meta), path("${prefix}_multiqc"), emit: outdir - //tuple val(meta), path("*multiqc_report.html"), emit: report - //tuple val(meta), path("*_data") , emit: data - //tuple val(meta), path("*_plots") , optional:true, emit: plots + tuple val(meta), path("*multiqc_report.html"), emit: report + tuple val(meta), path("*_data") , emit: data + tuple val(meta), path("*_plots") , optional:true, emit: plots path "versions.yml" , emit: versions when: diff --git a/nextflow.config b/nextflow.config index 2957205a..fe33a081 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,55 +11,49 @@ params { // Input options - sample_data = null - reference_data = null - bakta_db = null - temp_dir = null - data_dir = 'path_surveil_data' - download_bakta_db = true - max_depth = 100 - only_latin_binomial_refs = false - n_ref_strains = 30 - n_ref_species = 20 - n_ref_genera = 10 - n_ref_closest = 3 - n_ref_closest_named = 2 - n_ref_context = 7 - ref_min_ani = 0.85 - phylo_min_genes = 10 - phylo_max_genes = 200 - bakta_db_type = 'light' - cache_type = 'true' - - // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + sample_data = null + reference_data = null + bakta_db = null + temp_dir = null + data_dir = 'path_surveil_data' + download_bakta_db = true + max_depth = 100 + only_latin_binomial_refs = false + n_ref_strains = 30 + n_ref_species = 20 + n_ref_genera = 10 + n_ref_closest = 3 + n_ref_closest_named = 2 + n_ref_context = 7 + ref_min_ani = 0.85 + phylo_min_genes = 10 + phylo_max_genes = 200 + bakta_db_type = 'light' + cache_type = 'true' // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - out_dir = null - trace_dir = null - publish_dir_mode = 'copy' - copymode = 'medium' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false + outdir = null + trace_dir = null + publish_dir_mode = 'copy' + copymode = 'medium' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false help_full = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false - hpc_queue = null + validate_params = true + show_hidden_params = false + enable_conda = false + hpc_queue = null show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' @@ -238,9 +232,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -267,7 +258,7 @@ nextflow.enable.configProcessNamesValidation = false def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') if (! params.trace_dir) { - params.trace_dir = "${params.out_dir}/pipeline_info" + arams.trace_dir = "${params.outdir}/pipeline_info" } timeline { enabled = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 540fb734..1b5b6de0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["sample_data", "out_dir"], + "required": ["sample_data", "outdir"], "properties": { "sample_data": { "type": "string", @@ -19,7 +19,6 @@ "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.[ct]sv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about samples.", "help_text": "This CSV has one row per samples and contains information such as the location of input files, sample ID, labels, etc. Use this parameter to specify its location. See [the documentaion](https://github.com/grunwaldlab/pathogensurveillance?tab=readme-ov-file#input-format) for details on formatting this file.", "fa_icon": "fas fa-file-csv" @@ -34,7 +33,7 @@ "help_text": "This CSV has one row per reference and contains information such as the location of input files, reference ID, labels, etc. Use this parameter to specify its location. See [the documentaion](https://github.com/grunwaldlab/pathogensurveillance?tab=readme-ov-file#input-format) for details on formatting this file.", "fa_icon": "fas fa-file-csv" }, - "out_dir": { + "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage if running on Cloud infrastructure.", @@ -102,7 +101,6 @@ "description": "Maximum depth of reads to be used for all analses. Samples with more reads are subsampled to this depth.", "fa_icon": "fas fa-align-center" }, -<<<<<<< HEAD "only_latin_binomial_refs": { "type": "boolean", "description": "When selecting references automatically, only consider references with names that appear to be standard latin bionomials (i.e. no numbers or symbols in the first two words).", @@ -162,32 +160,6 @@ "default": 0.85, "description": "The minimum ANI between a sample and potential reference for that reference to be used for mapping reads from that sample. To force all the samples in a report group to use the same reference, set this value very low.", "fa_icon": "fas fa-align-center" -======= - "fasta": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" ->>>>>>> 240c46a01d4c364552f2ed2768e4f67f5a2be55c } } }, @@ -383,7 +355,7 @@ "trace_dir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.out_dir}/pipeline_info", + "default": "${params.outdir}/pipeline_info", "fa_icon": "fas fa-cogs", "hidden": true }, @@ -428,9 +400,6 @@ { "$ref": "#/$defs/input_output_options" }, - { - "$ref": "#/$defs/reference_genome_options" - }, { "$ref": "#/$defs/institutional_config_options" }, diff --git a/subworkflows/local/utils_nfcore_plantpathsurveil_pipeline/main.nf b/subworkflows/local/utils_nfcore_pathogensurveillance_pipeline/main.nf similarity index 86% rename from subworkflows/local/utils_nfcore_plantpathsurveil_pipeline/main.nf rename to subworkflows/local/utils_nfcore_pathogensurveillance_pipeline/main.nf index 40d60da8..28a1aa2b 100644 --- a/subworkflows/local/utils_nfcore_plantpathsurveil_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_pathogensurveillance_pipeline/main.nf @@ -26,16 +26,17 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin workflow PIPELINE_INITIALISATION { take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + sample_data_csv // string: Path to input sample data table + reference_data_csv // string: Path to reference data table main: - ch_versions = Channel.empty() + versions = Channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file @@ -66,35 +67,34 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters() - // - // Create channel from input file provided through params.input - // + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.sample_data, + params.reference_data, + params.multiqc_config, + params.bakta_db + ] + for (param in checkPathParamList) { + if (param) { file(param, checkIfExists: true) } + } - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } - .set { ch_samplesheet } + // Check mandatory parameters + if (params.sample_data) { + sample_data_csv = file(params.sample_data) + } else { + exit 1, 'Sample metadata CSV not specified.' + } + if (params.reference_data) { + reference_data_csv = file(params.reference_data) + } else { + reference_data_csv = [] + } emit: - samplesheet = ch_samplesheet - versions = ch_versions + sample_data_csv = sample_data_csv + reference_data_csv = reference_data_csv + versions = versions } /* diff --git a/workflows/pathogensurveillance.nf b/workflows/pathogensurveillance.nf index 6f2ee5d6..e7ede3a8 100644 --- a/workflows/pathogensurveillance.nf +++ b/workflows/pathogensurveillance.nf @@ -1,86 +1,24 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowPathogensurveillance.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.sample_data, - params.reference_data, - params.multiqc_config -] -for (param in checkPathParamList) { - if (param) { file(param, checkIfExists: true) } -} - -// Check mandatory parameters -if (params.sample_data) { - sample_data_csv = file(params.sample_data) -} else { - exit 1, 'Sample metadata CSV not specified.' -} -if (params.reference_data) { - reference_data_csv = file(params.reference_data) -} else { - reference_data_csv = [] -} -if (!params.bakta_db && !params.download_bakta_db ) { - exit 1, "No bakta database specified. Use either '--bakta_db' to point to a local bakta database or use '--download_bakta_db true' to download the Bakta database." -} - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { PREPARE_INPUT } from '../subworkflows/local/prepare_input' -include { COARSE_SAMPLE_TAXONOMY } from '../subworkflows/local/coarse_sample_taxonomy' -include { CORE_GENOME_PHYLOGENY } from '../subworkflows/local/core_genome_phylogeny' -include { VARIANT_ANALYSIS } from '../subworkflows/local/variant_analysis' -include { DOWNLOAD_REFERENCES } from '../subworkflows/local/download_references' -include { SKETCH_COMPARISON } from '../subworkflows/local/sketch_comparison' -include { GENOME_ASSEMBLY } from '../subworkflows/local/genome_assembly' -include { BUSCO_PHYLOGENY } from '../subworkflows/local/busco_phylogeny' -include { INITIAL_QC_CHECKS } from '../subworkflows/local/initial_qc_checks' - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { PREPARE_INPUT } from '../subworkflows/local/prepare_input' +include { COARSE_SAMPLE_TAXONOMY } from '../subworkflows/local/coarse_sample_taxonomy' +include { CORE_GENOME_PHYLOGENY } from '../subworkflows/local/core_genome_phylogeny' +include { VARIANT_ANALYSIS } from '../subworkflows/local/variant_analysis' +include { DOWNLOAD_REFERENCES } from '../subworkflows/local/download_references' +include { SKETCH_COMPARISON } from '../subworkflows/local/sketch_comparison' +include { GENOME_ASSEMBLY } from '../subworkflows/local/genome_assembly' +include { BUSCO_PHYLOGENY } from '../subworkflows/local/busco_phylogeny' +include { INITIAL_QC_CHECKS } from '../subworkflows/local/initial_qc_checks' include { MAIN_REPORT } from '../modules/local/main_report' include { RECORD_MESSAGES } from '../modules/local/record_messages' include { DOWNLOAD_ASSEMBLIES } from '../modules/local/download_assemblies' include { PREPARE_REPORT_INPUT } from '../modules/local/prepare_report_input' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -88,11 +26,15 @@ include { PREPARE_REPORT_INPUT } from '../modules/local/prepare_report_in ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow PATHOGENSURVEILLANCE { + take: + sample_data_csv + reference_data_csv + + main: + + // Initalize channel to accumulate information about software versions used versions = Channel.empty() messages = Channel.empty() @@ -147,14 +89,20 @@ workflow PATHOGENSURVEILLANCE { versions = versions.mix(BUSCO_PHYLOGENY.out.versions) messages = messages.mix(BUSCO_PHYLOGENY.out.messages) - // Save version info - CUSTOM_DUMPSOFTWAREVERSIONS ( - versions - .unique() - .collectFile(name: 'collated_versions.yml') - ) + // Collate and save software versions + softwareVersionsToYAML(versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true + ).set { collated_versions } // MultiQC + multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() + multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() + multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) fastqc_results = PREPARE_INPUT.out.sample_data .map{ [[id: it.sample_id], [id: it.report_group_ids]] } .combine(INITIAL_QC_CHECKS.out.fastqc_zip, by: 0) @@ -176,7 +124,7 @@ workflow PATHOGENSURVEILLANCE { multiqc_files = fastqc_results .join(nanoplot_results, remainder: true) .join(quast_results, remainder: true) - .combine(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(sort: true)) + .combine(collated_versions) .map { report_meta, fastqc, nanoplot, quast, versions -> files = fastqc ?: [] + nanoplot ?: [] + quast ?: [] + [versions] [report_meta, files.flatten()] @@ -185,7 +133,9 @@ workflow PATHOGENSURVEILLANCE { multiqc_files, multiqc_config.collect(sort: true).ifEmpty([]), multiqc_custom_config.collect(sort: true).ifEmpty([]), - multiqc_logo.collect(sort: true).ifEmpty([]) + multiqc_logo.collect(sort: true).ifEmpty([]), + [], + [] ) versions = versions.mix(MULTIQC.out.versions) @@ -288,7 +238,7 @@ workflow PATHOGENSURVEILLANCE { PREPARE_REPORT_INPUT ( report_inputs, - CUSTOM_DUMPSOFTWAREVERSIONS.out.yml.first() // .first converts it to a value channel so it can be reused for multiple reports. + collated_versions.first() // .first converts it to a value channel so it can be reused for multiple reports. ) MAIN_REPORT ( @@ -296,35 +246,6 @@ workflow PATHOGENSURVEILLANCE { Channel.fromPath("${projectDir}/assets/main_report", checkIfExists: true).first() // .first converts it to a value channel so it can be reused for multiple reports. ) + emit: + multiqc_report = MULTIQC.out.report } - - - - - - - - - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/plantpathsurveil.nf b/workflows/plantpathsurveil.nf index 84973af7..bb4d7165 100644 --- a/workflows/plantpathsurveil.nf +++ b/workflows/plantpathsurveil.nf @@ -8,7 +8,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_plantpathsurveil_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_pathogensurveillance_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~