-
Notifications
You must be signed in to change notification settings - Fork 9
/
config.yaml
72 lines (59 loc) · 4.45 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# This is the Snakemake configuration file that specifies paths and
# and options for the allele specific analysis pipeline. Anybody wishing to use
# the provided snakemake pipeline should first fill out this file with paths to
# their own data, as the Snakefile requires it.
# The path to a text file specifying where to find read information for each sample.
# Each row in the sample file should represent a different sample.
# The sample file should have 5 columns (each separated by a single tab):
# <unique_sample_name> <fastq1_dna_path> <fastq2_dna_path> <fastq1_rna_path> <fastq2_rna_path>
"sample_file" : "/iblm/netapp/home/amassarat/allele_specific_analysis/snakemake/samples.tsv"
# The path to a reference genome for DNA-seq alignment
"ref_genome" : "/iblm/netapp/home/amassarat/allele_specific_analysis/ref_data/WholeGenomeFasta/genome.fa"
# The path to an indexed reference genome for BWA
"ref_genome_bwa" : "/iblm/netapp/home/amassarat/allele_specific_analysis/ref_data/BWAIndex/genome.fa"
# The path to a STAR index directory for ref_genome
# If this is not provided, it will be created on the fly
"ref_genome_star" : "/iblm/netapp/home/amassarat/allele_specific_analysis/ref_data/StarIndex"
# Whether to perform GATK's base and variant score recalibration.
# If this option is false, you don't need to provide the
# target_sensitivity, hapmap, omni, 1000G, and dbSNP config options.
# However, if this option is false, you should provide the filter_expr config
# option instead.
# For more information about this config option, consider this article:
# https://software.broadinstitute.org/gatk/documentation/article.php?id=3225
"score_recal" : true
# An expression upon which to filter the SNPs.
# See the following documentation for more information:
# https://software.broadinstitute.org/gatk/documentation/tooldocs/4.0.4.0/org_broadinstitute_hellbender_tools_walkers_filters_VariantFiltration.php#--filter-expression
"filter_expr" : "QD < 2.0 || FS > 60.0 || MQ < 40.0 || SOR > 3.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0"
# GATK's VariantRecalibration "target sensitivity"
# What percentage of the sites in the truth sets should the GATK filtering
# model be able to accept? Increasing this number will allow more false
# positives but decreasing it will cause you to lose true positives.
"target_sensitivity" : 99
# Paths to each of four GATK VariantRecalibrator files as described in https://software.broadinstitute.org/gatk/documentation/article.php?id=1259
# These can usually be obtained from the GATK Resource Bundle: https://software.broadinstitute.org/gatk/download/bundle
"hapmap" : "/iblm/netapp/home/amassarat/allele_specific_analysis/other_data/hapmap.vcf"
"omni" : "/iblm/netapp/home/amassarat/allele_specific_analysis/other_data/omni.vcf"
"1000G" : "/iblm/netapp/home/amassarat/allele_specific_analysis/other_data/1000G.vcf"
"dbSNP" : "/iblm/netapp/home/amassarat/allele_specific_analysis/other_data/dbsnp.vcf"
# Text file containing names and lengths of all chromosomes in the assembly. chromInfo.txt files can be downloaded from the UCSC genome browser.
# For example a chromInfo.txt.gz file for hg19 can be downloaded from http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
"chrom_info" : "/iblm/netapp/home/amassarat/bin/WASP/examples/example_data/chromInfo.hg19.txt"
# If you set the rna_only option below to true the pipeline will default to
# running the rna-only version of the analysis, in which DNA read counts are
# ignored. This performs a more conservative test.
"rna_only" : true
# Gene annotation information in the GTF format.
# These can usually be obtained from GENCODE (https://www.gencodegenes.org/human/)
# under the "PRI" regions label
# If you provided a STAR index directory in 'ref_genome_star' above, you should
# provide the same gtf that you used to create that index
"gene_info" : "/iblm/netapp/home/amassarat/allele_specific_analysis/other_data/gencode.v19.genes.chr.gtf"
# If you have downloaded WASP from https://github.com/bmvdgeijn/WASP, specify the path to your WASP directory here
# Otherwise, specify the location of the directory in which you'd like it installed
# When this option is not provided, the pipeline will install WASP in your .snakemake directory
"wasp_dir" : "/iblm/netapp/home/amassarat/bin/WASP"
# directory in which to output all of the output files
# defined relative to whatever directory you execute the snakemake command in
"output_dir" : "/iblm/netapp/home/amassarat/allele_specific_analysis/snakemake/out"