Skip to content

Commit

Permalink
Merge pull request #1122 from maxulysse/annotation_cache
Browse files Browse the repository at this point in the history
FEAT: Use annotation-cache and update VEP to v110
  • Loading branch information
maxulysse authored Aug 17, 2023
2 parents 66af584 + 6ef8a46 commit 2934fcf
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 127 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1158](https://github.com/nf-core/sarek/pull/1158) - Add preprint
- [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster
- [#1173](https://github.com/nf-core/sarek/pull/1173) - CI tests for VQSR track with stub runs
- [#1122](https://github.com/nf-core/sarek/pull/1122) - Add `annotation cache` functionality

### Changed

Expand Down
40 changes: 8 additions & 32 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,9 @@ params {
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem"
snpeff_db = 87
snpeff_genome = 'GRCh37'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
vep_version = '108.2'
}
'GATK.GRCh38' {
ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip"
Expand Down Expand Up @@ -74,34 +72,28 @@ params {
pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi"
snpeff_db = 105
snpeff_genome = 'GRCh38'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
vep_version = '108.2'
}
'Ensembl.GRCh37' {
bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt"
snpeff_db = 87
snpeff_genome = 'GRCh37'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
vep_version = '108.2'
}
'NCBI.GRCh38' {
bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = 105
snpeff_genome = 'GRCh38'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
vep_version = '108.2'
}
'CHM13' {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
Expand All @@ -123,11 +115,9 @@ params {
readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt"
snpeff_db = 99
snpeff_genome = 'GRCm38'
snpeff_version = '5.1'
vep_cache_version = 102
vep_genome = 'GRCm38'
vep_species = 'mus_musculus'
vep_version = '108.2'
}
'TAIR10' {
bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/"
Expand All @@ -145,34 +135,28 @@ params {
readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt"
snpeff_db = 75
snpeff_genome = 'UMD3.1'
snpeff_version = '5.1'
vep_cache_version = 94
vep_genome = 'UMD3.1'
vep_species = 'bos_taurus'
vep_version = '108.2'
}
'WBcel235' {
bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = 105
snpeff_genome = 'WBcel235'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'WBcel235'
vep_species = 'caenorhabditis_elegans'
vep_version = '108.2'
}
'CanFam3.1' {
bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt"
snpeff_db = 99
snpeff_genome = 'CanFam3.1'
snpeff_version = '5.1'
vep_cache_version = 104
vep_genome = 'CanFam3.1'
vep_species = 'canis_lupus_familiaris'
vep_version = '108.2'
}
'GRCz10' {
bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/"
Expand Down Expand Up @@ -228,11 +212,9 @@ params {
fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = 105
snpeff_genome = 'R64-1-1'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'R64-1-1'
vep_species = 'saccharomyces_cerevisiae'
vep_version = '108.2'
}
'EF2' {
bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/"
Expand All @@ -258,35 +240,29 @@ params {
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = 105
snpeff_genome = 'GRCh38'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
vep_version = '108.2'
}
'hg19' {
bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt"
snpeff_db = 87
snpeff_genome = 'GRCh37'
snpeff_version = '5.1'
vep_cache_version = 108
vep_cache_version = 110
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
vep_version = '108.2'
}
'mm10' {
bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt"
snpeff_db = 99
snpeff_genome = 'GRCm38'
snpeff_version = '5.1'
vep_cache_version = 102
vep_genome = 'GRCm38'
vep_species = 'mus_musculus'
vep_version = '108.2'
}
'bosTau8' {
bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/"
Expand Down
2 changes: 0 additions & 2 deletions conf/modules/annotate.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ process {
withName: 'SNPEFF_SNPEFF' {
ext.prefix = { vcf.baseName - ".vcf" + "_snpEff" }
ext.args = '-nodownload -canon -v'
if (!params.snpeff_cache && !params.download_cache) container = { params.snpeff_genome ? "docker.io/nfcore/snpeff:${params.snpeff_version}.${params.snpeff_genome}" : "docker.io/nfcore/snpeff:${params.snpeff_version}.${params.genome}" }
publishDir = [
[
mode: params.publish_dir_mode,
Expand All @@ -46,7 +45,6 @@ process {
].join(' ').trim() }
// If just VEP: <vcf prefix>_VEP.ann.vcf
ext.prefix = { vcf.baseName - ".vcf" + "_VEP.ann" }
if (!params.vep_cache && !params.download_cache) container = { params.vep_genome ? "docker.io/nfcore/vep:${params.vep_version}.${params.vep_genome}" : "docker.io/nfcore/vep:${params.vep_version}.${params.genome}" }
publishDir = [
[
mode: params.publish_dir_mode,
Expand Down
8 changes: 4 additions & 4 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,20 @@ params {
germline_resource = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
intervals = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.interval_list"
known_indels = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
snpeff_cache = null
snpeff_db = 105
snpeff_genome = 'WBcel235'
snpeff_version = '5.1'
vep_cache_version = 106
vep_cache = null
vep_cache_version = 110
vep_genome = 'WBcel235'
vep_species = 'caenorhabditis_elegans'
vep_version = '106.1'

// default params
split_fastq = 0 // no FASTQ splitting
tools = 'strelka' // Variant calling with Strelka

// Ignore params that will throw warning through params validation
validationSchemaIgnoreParams = 'genomes,snpeff_version,vep_version'
validationSchemaIgnoreParams = 'genomes'
}

process {
Expand Down
8 changes: 4 additions & 4 deletions conf/test/cache.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,20 @@ params {
germline_resource = params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz']
intervals = params.test_data['homo_sapiens']['genome']['genome_interval_list']
known_indels = params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz']
snpeff_cache = null
snpeff_db = 105
snpeff_genome = 'WBcel235'
snpeff_version = '5.1'
vep_cache_version = 106
vep_cache = null
vep_cache_version = 110
vep_genome = 'WBcel235'
vep_species = 'caenorhabditis_elegans'
vep_version = '106.1'

// default params
split_fastq = 0 // no FASTQ splitting
tools = 'strelka' // Variant calling with Strelka

// Ignore params that will throw warning through params validation
validationSchemaIgnoreParams = 'genomes,test_data,snpeff_version,vep_version'
validationSchemaIgnoreParams = 'genomes,test_data'
}

process {
Expand Down
2 changes: 0 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ params.pon = WorkflowMain.getGenomeAttribute(params, 'pon')
params.pon_tbi = WorkflowMain.getGenomeAttribute(params, 'pon_tbi')
params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db')
params.snpeff_genome = WorkflowMain.getGenomeAttribute(params, 'snpeff_genome')
params.snpeff_version = WorkflowMain.getGenomeAttribute(params, 'snpeff_version')
params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version')
params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome')
params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species')
params.vep_version = WorkflowMain.getGenomeAttribute(params, 'vep_version')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
39 changes: 20 additions & 19 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -73,24 +73,25 @@ params {
sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper

// Annotation
vep_out_format = "vcf"
vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP
vep_dbnsfp = null // dbnsfp plugin disabled within VEP
dbnsfp = null // No dbnsfp processed file
dbnsfp_tbi = null // No dbnsfp processed file index
dbnsfp_consequence = null // No default consequence for dbnsfp plugin
dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
vep_loftee = null // loftee plugin disabled within VEP
vep_spliceai = null // spliceai plugin disabled within VEP
spliceai_snv = null // No spliceai_snv file
spliceai_snv_tbi = null // No spliceai_snv file index
spliceai_indel = null // No spliceai_indel file
spliceai_indel_tbi = null // No spliceai_indel file index
vep_spliceregion = null // spliceregion plugin disabled within VEP
outdir_cache = null // No output directory for cache
snpeff_cache = null // No directory for snpEff cache
vep_cache = null // No directory for VEP cache
vep_include_fasta = false // Don't use fasta file for annotation with VEP
dbnsfp = null // No dbnsfp processed file
dbnsfp_consequence = null // No default consequence for dbnsfp plugin
dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin
dbnsfp_tbi = null // No dbnsfp processed file index
outdir_cache = null // No default outdir cache
snpeff_cache = 's3://annotation-cache/snpeff_cache/'
spliceai_indel = null // No spliceai_indel file
spliceai_indel_tbi = null // No spliceai_indel file index
spliceai_snv = null // No spliceai_snv file
spliceai_snv_tbi = null // No spliceai_snv file index
use_annotation_cache_keys = true
vep_cache = 's3://annotation-cache/vep_cache/'
vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP
vep_dbnsfp = null // dbnsfp plugin disabled within VEP
vep_include_fasta = false // Don't use fasta file for annotation with VEP
vep_loftee = null // loftee plugin disabled within VEP
vep_out_format = "vcf"
vep_spliceai = null // spliceai plugin disabled within VEP
vep_spliceregion = null // spliceregion plugin disabled within VEP

// MultiQC options
multiqc_config = null
Expand Down Expand Up @@ -128,7 +129,7 @@ params {
// Schema validation default options
validationFailUnrecognisedParams = false
validationLenientMode = true
validationSchemaIgnoreParams = 'genomes,snpeff_version,vep_version,cf_ploidy'
validationSchemaIgnoreParams = 'genomes,cf_ploidy'
validationShowHiddenParams = false
validate_params = true
}
Expand Down
22 changes: 8 additions & 14 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -508,16 +508,24 @@
"hidden": true,
"help_text": "Using this params you can add custom args to VEP."
},
"use_annotation_cache_keys": {
"type": "boolean",
"fa_icon": "fas fa-toolbox",
"description": "Use annotation cache keys for snpeff_cache and vep_cache.",
"hidden": true
},
"snpeff_cache": {
"type": "string",
"fa_icon": "fas fa-file",
"default": "s3://annotation-cache/snpeff_cache/",
"description": "Path to snpEff cache.",
"help_text": "To be used with `--annotation_cache`.",
"hidden": true
},
"vep_cache": {
"type": "string",
"fa_icon": "fas fa-file",
"default": "s3://annotation-cache/vep_cache/",
"description": "Path to VEP cache.",
"help_text": "To be used with `--annotation_cache`.",
"hidden": true
Expand Down Expand Up @@ -729,13 +737,6 @@
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when using the container with pre-downloaded cache.",
"hidden": true
},
"snpeff_version": {
"type": "string",
"fa_icon": "fas fa-tag",
"description": "snpEff version.",
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the snpeff version when using the container with pre-downloaded cache.",
"hidden": true
},
"vep_genome": {
"type": "string",
"fa_icon": "fas fa-microscope",
Expand All @@ -757,13 +758,6 @@
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively cache version can be use to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers",
"hidden": true
},
"vep_version": {
"type": "string",
"fa_icon": "fas fa-tag",
"description": "VEP version.",
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the VEP version when using the container with pre-downloaded cache.",
"hidden": true
},
"save_reference": {
"type": "boolean",
"fa_icon": "fas fa-download",
Expand Down
26 changes: 0 additions & 26 deletions tests/test_annotation_cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,3 @@
- path: results/cache/vep_cache
- path: results/annotation
should_exist: false
- name: Download annotation cache and annotate using snpEff and VEP
command: nextflow run main.nf -profile test_cache,annotation --tools snpeff,vep --download_cache --outdir results
tags:
- annotation
- cache
files:
- path: results/multiqc
- path: results/cache/snpeff_cache
- path: results/cache/vep_cache
- path: results/annotation/test/test_VEP.ann.vcf.gz
# conda changes md5sums for test
- path: results/annotation/test/test_VEP.ann.vcf.gz.tbi
# conda changes md5sums for test
- path: results/annotation/test/test_snpEff.ann.vcf.gz
# conda changes md5sums for test
- path: results/annotation/test/test_snpEff.ann.vcf.gz.tbi
# conda changes md5sums for test
- path: results/multiqc
- path: results/reports/EnsemblVEP/test/test_VEP.ann.summary.html
# text-based file changes md5sums on reruns
- path: results/reports/snpeff/test/snpEff_summary.html
# text-based file changes md5sums on reruns
- path: results/reports/snpeff/test/test_snpEff.csv
# text-based file changes md5sums on reruns
- path: results/reports/snpeff/test/test_snpEff.genes.txt
md5sum: 130536bf0237d7f3f746d32aaa32840a
Loading

0 comments on commit 2934fcf

Please sign in to comment.