Skip to content

Commit

Permalink
Merge pull request #599 from maxulysse/dev_csv_check
Browse files Browse the repository at this point in the history
Add checks for correct data type for params.step
  • Loading branch information
maxulysse authored Jun 23, 2022
2 parents 67c2546 + ec56b45 commit a1e271c
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#590](https://github.com/nf-core/sarek/pull/590) - Fix empty folders during scatter/gather
- [#592](https://github.com/nf-core/sarek/pull/592) - Fix optional resources for Mutect2, GetPileupSummaries, and HaplotypeCaller: issue [#299](https://github.com/nf-core/sarek/issues/299), [#359](https://github.com/nf-core/sarek/issues/359), [#367](https://github.com/nf-core/sarek/issues/367)
- [#598](https://github.com/nf-core/sarek/pull/598) - Remove WARNING message for config selector not matching
- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for correct data type for `params.step`
- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for no empty `--tools` with `--step variant_calling` or `--step annotation`
- [#600](https://github.com/nf-core/sarek/pull/600) - Remove `nf-core lint` warnings

### Deprecated
Expand Down
91 changes: 73 additions & 18 deletions workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ if(!params.dbsnp && !params.known_indels){
}
}

if (params.step == "variant_calling" && !params.tools) {
log.error "Please specify at least one tool when using `--step variant_calling`.\nhttps://nf-co.re/sarek/parameters#tools"
exit 1
}

if (params.step == "annotation" && !params.tools) {
log.error "Please specify at least one tool when using `--step annotation`.\nhttps://nf-co.re/sarek/parameters#tools"
exit 1
}

// Save AWS IGenomes file containing annotation version
def anno_readme = params.genomes[params.genome]?.readme
if (anno_readme && file(anno_readme).exists()) {
Expand Down Expand Up @@ -943,18 +953,14 @@ def extract_csv(csv_file) {

// check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so.
new File(csv_file.toString()).withReader('UTF-8') { reader ->
def line, numberOfLinesInSampleSheet = 0;
while ((line = reader.readLine()) != null) {
numberOfLinesInSampleSheet++
}
if( numberOfLinesInSampleSheet < 2){
log.error "Sample sheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines."
System.exit(1)
}
def line, numberOfLinesInSampleSheet = 0;
while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++}
if (numberOfLinesInSampleSheet < 2) {
log.error "Sample sheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines."
System.exit(1)
}
}



Channel.from(csv_file).splitCsv(header: true)
//Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination
.map{ row ->
Expand Down Expand Up @@ -1003,57 +1009,106 @@ def extract_csv(csv_file) {
meta.data_type = "fastq"

meta.size = 1 // default number of splitted fastq
return [meta, [fastq_1, fastq_2]]

if (params.step == 'mapping') return [meta, [fastq_1, fastq_2]]
else {
log.error "Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}

// start from BAM
} else if (row.lane && row.bam) {
meta.id = "${row.sample}-${row.lane}".toString()
def bam = file(row.bam, checkIfExists: true)
def CN = params.seq_center ? "CN:${params.seq_center}\\t" : ''
def read_group = "\"@RG\\tID:${row_sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\""

meta.numLanes = numLanes.toInteger()
meta.read_group = read_group.toString()
meta.data_type = "bam"

meta.size = 1 // default number of splitted fastq
return [meta, bam]

if (params.step == 'mapping') return [meta, bam]
else {
log.error "Samplesheet contains ubam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}

// recalibration
} else if (row.table && row.cram) {
meta.id = meta.sample
def cram = file(row.cram, checkIfExists: true)
def crai = file(row.crai, checkIfExists: true)
def table = file(row.table, checkIfExists: true)

meta.data_type = "cram"
return [meta, cram, crai, table]

if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai, table]
else {
log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}

// recalibration when skipping MarkDuplicates
} else if (row.table && row.bam) {
meta.id = meta.sample
def bam = file(row.bam, checkIfExists: true)
def bai = file(row.bai, checkIfExists: true)
def table = file(row.table, checkIfExists: true)

meta.data_type = "bam"
return [meta, bam, bai, table]

if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai, table]
else {
log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}

// prepare_recalibration or variant_calling
} else if (row.cram) {
meta.id = meta.sample
def cram = file(row.cram, checkIfExists: true)
def crai = file(row.crai, checkIfExists: true)

meta.data_type = "cram"
return [meta, cram, crai]

if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai]
else {
log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}

// prepare_recalibration when skipping MarkDuplicates or `--step markduplicates`
} else if (row.bam) {
meta.id = meta.sample
def bam = file(row.bam, checkIfExists: true)
def bai = file(row.bai, checkIfExists: true)

meta.data_type = "bam"
return [meta, bam, bai]

if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai]
else {
log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}

// annotation
} else if (row.vcf) {
meta.id = meta.sample
def vcf = file(row.vcf, checkIfExists: true)

meta.data_type = "vcf"
meta.variantcaller = row.variantcaller ?: ""
return [meta, vcf]

if (params.step == 'annotate') return [meta, vcf]
else {
log.error "Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations"
System.exit(1)
}
} else {
log.warn "Missing or unknown field in csv file header"
log.warn "Missing or unknown field in csv file header. Please check your samplesheet"
System.exit(1)
}
}
}
Expand Down

0 comments on commit a1e271c

Please sign in to comment.