From ee8050afdfc4bf3ebd4284645344e9f9f58804a5 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 12 Jul 2022 15:16:05 +0200 Subject: [PATCH 1/5] Checks for consistency between requested tools and supplied samples --- workflows/sarek.nf | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index d46fbe5ea9..ed09d3da5a 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1034,9 +1034,14 @@ def extract_csv(csv_file) { } } + numberOfSamples = 0 + numberOfNormalSamples = 0 + numberOfTumorSamples = 0 + Channel.from(csv_file).splitCsv(header: true) //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination .map{ row -> + numberOfSamples++ if (!(row.patient && row.sample)){ log.error "Missing field in csv file header. The csv file must have fields named 'patient' and 'sample'." System.exit(1) @@ -1048,6 +1053,7 @@ def extract_csv(csv_file) { [rows, size] }.transpose() .map{ row, numLanes -> //from here do the usual thing for csv parsing + def meta = [:] // Meta data to identify samplesheet @@ -1066,6 +1072,34 @@ def extract_csv(csv_file) { if (row.status) meta.status = row.status.toInteger() else meta.status = 0 + if (meta.status == 0) numberOfNormalSamples++ + else numberOfTumorSamples++ + + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal/germline-samples. + if ((numberOfNormalSamples == numberOfSamples) && params.tools) { // In this case, the sample-sheet contains no tumor-samples + def tools_requiring_tumor_samples = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def requested_tools_requiring_tumor_samples = [] + tools_requiring_tumor_samples.each{ tool_requiring_tumor_samples -> + if (params.tools.contains(tool_requiring_tumor_samples)) requested_tools_requiring_tumor_samples.add(tool_requiring_tumor_samples) + } + if (!requested_tools_requiring_tumor_samples.isEmpty()) { + log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested by the option "tools", expect at least one tumor-sample : ' + requested_tools_requiring_tumor_samples.join(", ")) + System.exit(1) + } + } else if ((numberOfTumorSamples == numberOfSamples) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> + if (params.tools.contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + log.error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + System.exit(1) + } + } + // mapping with fastq if (row.lane && row.fastq_2) { meta.id = "${row.sample}-${row.lane}".toString() From 9aff8776cd4fc0547f7d1a2b16a5fa6ef6ccf54e Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 12 Jul 2022 15:52:02 +0200 Subject: [PATCH 2/5] Renaming counter-variables --- workflows/sarek.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index ed09d3da5a..e3b3d1673d 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1034,14 +1034,14 @@ def extract_csv(csv_file) { } } - numberOfSamples = 0 - numberOfNormalSamples = 0 - numberOfTumorSamples = 0 + sample_count_all = 0 + sample_count_normal = 0 + sample_count_tumor = 0 Channel.from(csv_file).splitCsv(header: true) //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination .map{ row -> - numberOfSamples++ + sample_count_all++ if (!(row.patient && row.sample)){ log.error "Missing field in csv file header. The csv file must have fields named 'patient' and 'sample'." System.exit(1) @@ -1072,13 +1072,13 @@ def extract_csv(csv_file) { if (row.status) meta.status = row.status.toInteger() else meta.status = 0 - if (meta.status == 0) numberOfNormalSamples++ - else numberOfTumorSamples++ + if (meta.status == 0) sample_count_normal++ + else sample_count_tumor++ // Two checks for ensuring that the pipeline stops with a meaningful error message if // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and - // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal/germline-samples. - if ((numberOfNormalSamples == numberOfSamples) && params.tools) { // In this case, the sample-sheet contains no tumor-samples + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + if ((sample_count_normal == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no tumor-samples def tools_requiring_tumor_samples = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] def requested_tools_requiring_tumor_samples = [] tools_requiring_tumor_samples.each{ tool_requiring_tumor_samples -> @@ -1088,7 +1088,7 @@ def extract_csv(csv_file) { log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested by the option "tools", expect at least one tumor-sample : ' + requested_tools_requiring_tumor_samples.join(", ")) System.exit(1) } - } else if ((numberOfTumorSamples == numberOfSamples) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples + } else if ((sample_count_tumor == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] def requested_tools_requiring_normal_samples = [] tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> From c0844d4cb5aba6695bf2e71b4d486b3affbd50fe Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 12 Jul 2022 16:27:15 +0200 Subject: [PATCH 3/5] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a16778ad94..6fae288f74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--vep_version` to allow more configuration on the vep container definition - [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools - [#623](https://github.com/nf-core/sarek/pull/623) - Additional checks of data in the input sample sheet. +- [#629](https://github.com/nf-core/sarek/pull/629) - Added checks to catch inconsistency between supplied samples and requested tools. ### Changed From f0e361dd3e99f71d13aa6ea970d792a583d6ff35 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen <37172585+asp8200@users.noreply.github.com> Date: Tue, 12 Jul 2022 17:27:55 +0200 Subject: [PATCH 4/5] Update workflows/sarek.nf shorter variable names Co-authored-by: Maxime U. Garcia --- workflows/sarek.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index e3b3d1673d..23c94c53f4 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1079,13 +1079,13 @@ def extract_csv(csv_file) { // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. if ((sample_count_normal == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no tumor-samples - def tools_requiring_tumor_samples = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] - def requested_tools_requiring_tumor_samples = [] - tools_requiring_tumor_samples.each{ tool_requiring_tumor_samples -> - if (params.tools.contains(tool_requiring_tumor_samples)) requested_tools_requiring_tumor_samples.add(tool_requiring_tumor_samples) + def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def tools_tumor_asked = [] + tools_tumor.each{ tool -> + if (params.tools.contains(tool)) tools_tumor_asked.add(tool) } - if (!requested_tools_requiring_tumor_samples.isEmpty()) { - log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested by the option "tools", expect at least one tumor-sample : ' + requested_tools_requiring_tumor_samples.join(", ")) + if (!tools_tumor_asked.isEmpty()) { + log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) System.exit(1) } } else if ((sample_count_tumor == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples From a3b769ca12dc63088c45a8be207f112a4f282de7 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 12 Jul 2022 20:04:49 +0200 Subject: [PATCH 5/5] Fixing some check of the sample-sheet --- workflows/sarek.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 23c94c53f4..1b38654e69 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1028,7 +1028,7 @@ def extract_csv(csv_file) { } if (!sample2patient.containsKey(row.sample.toString())) { sample2patient[row.sample.toString()] = row.patient.toString() - } else if (sample2patient[row.sample.toString()] !== row.patient.toString()) { + } else if (sample2patient[row.sample.toString()] != row.patient.toString()) { log.error('The sample "' + row.sample.toString() + '" is registered for both patient "' + row.patient.toString() + '" and "' + sample2patient[row.sample.toString()] + '" in the sample sheet.') System.exit(1) }