From 7d2233cb87f0cfa8c8f2590c7036b563d2d9e5f3 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 21 Jun 2022 14:30:08 +0200 Subject: [PATCH 1/5] add checks for correct data type for params.step --- workflows/sarek.nf | 67 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 388c7dee7e..fd1e044708 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -937,8 +937,6 @@ def extract_csv(csv_file) { } } - - Channel.from(csv_file).splitCsv(header: true) //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination .map{ row -> @@ -987,57 +985,106 @@ def extract_csv(csv_file) { meta.data_type = "fastq" meta.size = 1 // default number of splitted fastq - return [meta, [fastq_1, fastq_2]] + + if (params.step == 'mapping') return [meta, [fastq_1, fastq_2]] + else { + log.error "Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // start from BAM } else if (row.lane && row.bam) { meta.id = "${row.sample}-${row.lane}".toString() def bam = file(row.bam, checkIfExists: true) def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' def read_group = "\"@RG\\tID:${row_sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" + meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "bam" + meta.size = 1 // default number of splitted fastq - return [meta, bam] + + if (params.step == 'mapping') return [meta, bam] + else { + log.error "Samplesheet contains ubam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // recalibration } else if (row.table && row.cram) { meta.id = meta.sample def cram = file(row.cram, checkIfExists: true) def crai = file(row.crai, checkIfExists: true) def table = file(row.table, checkIfExists: true) + meta.data_type = "cram" - return [meta, cram, crai, table] + + if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, cram, crai, table] + else { + log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // recalibration when skipping MarkDuplicates } else if (row.table && row.bam) { meta.id = meta.sample def bam = file(row.bam, checkIfExists: true) def bai = file(row.bai, checkIfExists: true) def table = file(row.table, checkIfExists: true) + meta.data_type = "bam" - return [meta, bam, bai, table] + + if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, bam, bai, table] + else { + log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // prepare_recalibration or variant_calling } else if (row.cram) { meta.id = meta.sample def cram = file(row.cram, checkIfExists: true) def crai = file(row.crai, checkIfExists: true) + meta.data_type = "cram" - return [meta, cram, crai] + + if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, cram, crai] + else { + log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` } else if (row.bam) { meta.id = meta.sample def bam = file(row.bam, checkIfExists: true) def bai = file(row.bai, checkIfExists: true) + meta.data_type = "bam" - return [meta, bam, bai] + + if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, bam, bai] + else { + log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // annotation } else if (row.vcf) { meta.id = meta.sample def vcf = file(row.vcf, checkIfExists: true) + meta.data_type = "vcf" meta.variantcaller = row.variantcaller ?: "" - return [meta, vcf] + + if (params.step == 'annotation') return [meta, vcf] + else { + log.error "Samplesheet contains vcf but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } } else { - log.warn "Missing or unknown field in csv file header" + log.warn "Missing or unknown field in csv file header. Please check your samplesheet" + System.exit(1) } } } From c8674393aacc80faa649160ca8c63d10b67bf8fc Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 21 Jun 2022 14:35:44 +0200 Subject: [PATCH 2/5] code polishing --- workflows/sarek.nf | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index fd1e044708..d0f095973c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -927,14 +927,12 @@ def extract_csv(csv_file) { // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so. new File(csv_file.toString()).withReader('UTF-8') { reader -> - def line, numberOfLinesInSampleSheet = 0; - while ((line = reader.readLine()) != null) { - numberOfLinesInSampleSheet++ - } - if( numberOfLinesInSampleSheet < 2){ - log.error "Sample sheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." - System.exit(1) - } + def line, numberOfLinesInSampleSheet = 0; + while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} + if (numberOfLinesInSampleSheet < 2) { + log.error "Sample sheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." + System.exit(1) + } } Channel.from(csv_file).splitCsv(header: true) From 4f53e001088c5ccd027fc3cbc7cc8e4b12f2c852 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 21 Jun 2022 14:41:08 +0200 Subject: [PATCH 3/5] no empty --tools with --step variant_calling or --step annotation --- workflows/sarek.nf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index d0f095973c..36854f9985 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -80,6 +80,16 @@ if(!params.dbsnp && !params.known_indels){ } } +if (params.step == "variant_calling" && !params.tools) { + log.error "Please specify at least one tool when using `--step variant_calling`.\nhttps://nf-co.re/sarek/parameters#tools" + exit 1 +} + +if (params.step == "annotation" && !params.tools) { + log.error "Please specify at least one tool when using `--step annotation`.\nhttps://nf-co.re/sarek/parameters#tools" + exit 1 +} + // Save AWS IGenomes file containing annotation version def anno_readme = params.genomes[params.genome]?.readme if (anno_readme && file(anno_readme).exists()) { From 8d6ccef9d25803ef44b41d53b3763af09336bb21 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 21 Jun 2022 14:44:49 +0200 Subject: [PATCH 4/5] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 996256a3ad..54b328d039 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -96,6 +96,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#590](https://github.com/nf-core/sarek/pull/590) - Fix empty folders during scatter/gather - [#592](https://github.com/nf-core/sarek/pull/592) - Fix optional resources for Mutect2, GetPileupSummaries, and HaplotypeCaller: issue [#299](https://github.com/nf-core/sarek/issues/299), [#359](https://github.com/nf-core/sarek/issues/359), [#367](https://github.com/nf-core/sarek/issues/367) - [#598](https://github.com/nf-core/sarek/pull/598) - Remove WARNING message for config selector not matching +- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for correct data type for `params.step` +- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for no empty `--tools` with `--step variant_calling` or `--step annotation` ### Deprecated From ca7b07c2e1298f17eab49161c4ebe92b9adc5152 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 21 Jun 2022 15:01:23 +0200 Subject: [PATCH 5/5] step is annotate, not annotation :facepalm: --- workflows/sarek.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 36854f9985..7d0088e9e6 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1028,7 +1028,7 @@ def extract_csv(csv_file) { meta.data_type = "cram" - if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, cram, crai, table] + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai, table] else { log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" System.exit(1) @@ -1043,7 +1043,7 @@ def extract_csv(csv_file) { meta.data_type = "bam" - if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, bam, bai, table] + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai, table] else { log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" System.exit(1) @@ -1057,7 +1057,7 @@ def extract_csv(csv_file) { meta.data_type = "cram" - if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, cram, crai] + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai] else { log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" System.exit(1) @@ -1071,7 +1071,7 @@ def extract_csv(csv_file) { meta.data_type = "bam" - if (!(params.step == 'mapping' || params.step == 'annotation')) return [meta, bam, bai] + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai] else { log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" System.exit(1) @@ -1085,9 +1085,9 @@ def extract_csv(csv_file) { meta.data_type = "vcf" meta.variantcaller = row.variantcaller ?: "" - if (params.step == 'annotation') return [meta, vcf] + if (params.step == 'annotate') return [meta, vcf] else { - log.error "Samplesheet contains vcf but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + log.error "Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" System.exit(1) } } else {