diff --git a/CHANGELOG.md b/CHANGELOG.md index ac6f346bcd..ac5c45accb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -99,6 +99,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#590](https://github.com/nf-core/sarek/pull/590) - Fix empty folders during scatter/gather - [#592](https://github.com/nf-core/sarek/pull/592) - Fix optional resources for Mutect2, GetPileupSummaries, and HaplotypeCaller: issue [#299](https://github.com/nf-core/sarek/issues/299), [#359](https://github.com/nf-core/sarek/issues/359), [#367](https://github.com/nf-core/sarek/issues/367) - [#598](https://github.com/nf-core/sarek/pull/598) - Remove WARNING message for config selector not matching +- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for correct data type for `params.step` +- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for no empty `--tools` with `--step variant_calling` or `--step annotation` - [#600](https://github.com/nf-core/sarek/pull/600) - Remove `nf-core lint` warnings ### Deprecated diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 9e2c3b3777..cb24c2b797 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -80,6 +80,16 @@ if(!params.dbsnp && !params.known_indels){ } } +if (params.step == "variant_calling" && !params.tools) { + log.error "Please specify at least one tool when using `--step variant_calling`.\nhttps://nf-co.re/sarek/parameters#tools" + exit 1 +} + +if (params.step == "annotation" && !params.tools) { + log.error "Please specify at least one tool when using `--step annotation`.\nhttps://nf-co.re/sarek/parameters#tools" + exit 1 +} + // Save AWS IGenomes file containing annotation version def anno_readme = params.genomes[params.genome]?.readme if (anno_readme && file(anno_readme).exists()) { @@ -943,18 +953,14 @@ def extract_csv(csv_file) { // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so. new File(csv_file.toString()).withReader('UTF-8') { reader -> - def line, numberOfLinesInSampleSheet = 0; - while ((line = reader.readLine()) != null) { - numberOfLinesInSampleSheet++ - } - if( numberOfLinesInSampleSheet < 2){ - log.error "Sample sheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." - System.exit(1) - } + def line, numberOfLinesInSampleSheet = 0; + while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} + if (numberOfLinesInSampleSheet < 2) { + log.error "Sample sheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." + System.exit(1) + } } - - Channel.from(csv_file).splitCsv(header: true) //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination .map{ row -> @@ -1003,57 +1009,106 @@ def extract_csv(csv_file) { meta.data_type = "fastq" meta.size = 1 // default number of splitted fastq - return [meta, [fastq_1, fastq_2]] + + if (params.step == 'mapping') return [meta, [fastq_1, fastq_2]] + else { + log.error "Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // start from BAM } else if (row.lane && row.bam) { meta.id = "${row.sample}-${row.lane}".toString() def bam = file(row.bam, checkIfExists: true) def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' def read_group = "\"@RG\\tID:${row_sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" + meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "bam" + meta.size = 1 // default number of splitted fastq - return [meta, bam] + + if (params.step == 'mapping') return [meta, bam] + else { + log.error "Samplesheet contains ubam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // recalibration } else if (row.table && row.cram) { meta.id = meta.sample def cram = file(row.cram, checkIfExists: true) def crai = file(row.crai, checkIfExists: true) def table = file(row.table, checkIfExists: true) + meta.data_type = "cram" - return [meta, cram, crai, table] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai, table] + else { + log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // recalibration when skipping MarkDuplicates } else if (row.table && row.bam) { meta.id = meta.sample def bam = file(row.bam, checkIfExists: true) def bai = file(row.bai, checkIfExists: true) def table = file(row.table, checkIfExists: true) + meta.data_type = "bam" - return [meta, bam, bai, table] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai, table] + else { + log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // prepare_recalibration or variant_calling } else if (row.cram) { meta.id = meta.sample def cram = file(row.cram, checkIfExists: true) def crai = file(row.crai, checkIfExists: true) + meta.data_type = "cram" - return [meta, cram, crai] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai] + else { + log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` } else if (row.bam) { meta.id = meta.sample def bam = file(row.bam, checkIfExists: true) def bai = file(row.bai, checkIfExists: true) + meta.data_type = "bam" - return [meta, bam, bai] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai] + else { + log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } + // annotation } else if (row.vcf) { meta.id = meta.sample def vcf = file(row.vcf, checkIfExists: true) + meta.data_type = "vcf" meta.variantcaller = row.variantcaller ?: "" - return [meta, vcf] + + if (params.step == 'annotate') return [meta, vcf] + else { + log.error "Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations" + System.exit(1) + } } else { - log.warn "Missing or unknown field in csv file header" + log.warn "Missing or unknown field in csv file header. Please check your samplesheet" + System.exit(1) } } }