diff --git a/CHANGELOG.md b/CHANGELOG.md index a16778ad94..6fae288f74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--vep_version` to allow more configuration on the vep container definition - [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools - [#623](https://github.com/nf-core/sarek/pull/623) - Additional checks of data in the input sample sheet. +- [#629](https://github.com/nf-core/sarek/pull/629) - Added checks to catch inconsistency between supplied samples and requested tools. ### Changed diff --git a/workflows/sarek.nf b/workflows/sarek.nf index d46fbe5ea9..1b38654e69 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1028,15 +1028,20 @@ def extract_csv(csv_file) { } if (!sample2patient.containsKey(row.sample.toString())) { sample2patient[row.sample.toString()] = row.patient.toString() - } else if (sample2patient[row.sample.toString()] !== row.patient.toString()) { + } else if (sample2patient[row.sample.toString()] != row.patient.toString()) { log.error('The sample "' + row.sample.toString() + '" is registered for both patient "' + row.patient.toString() + '" and "' + sample2patient[row.sample.toString()] + '" in the sample sheet.') System.exit(1) } } + sample_count_all = 0 + sample_count_normal = 0 + sample_count_tumor = 0 + Channel.from(csv_file).splitCsv(header: true) //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination .map{ row -> + sample_count_all++ if (!(row.patient && row.sample)){ log.error "Missing field in csv file header. The csv file must have fields named 'patient' and 'sample'." System.exit(1) @@ -1048,6 +1053,7 @@ def extract_csv(csv_file) { [rows, size] }.transpose() .map{ row, numLanes -> //from here do the usual thing for csv parsing + def meta = [:] // Meta data to identify samplesheet @@ -1066,6 +1072,34 @@ def extract_csv(csv_file) { if (row.status) meta.status = row.status.toInteger() else meta.status = 0 + if (meta.status == 0) sample_count_normal++ + else sample_count_tumor++ + + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + if ((sample_count_normal == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no tumor-samples + def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def tools_tumor_asked = [] + tools_tumor.each{ tool -> + if (params.tools.contains(tool)) tools_tumor_asked.add(tool) + } + if (!tools_tumor_asked.isEmpty()) { + log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + System.exit(1) + } + } else if ((sample_count_tumor == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> + if (params.tools.contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + log.error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + System.exit(1) + } + } + // mapping with fastq if (row.lane && row.fastq_2) { meta.id = "${row.sample}-${row.lane}".toString()