-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMultisample_Fastq_to_Gvcf_GATK4.wdl
127 lines (92 loc) · 3.07 KB
/
Multisample_Fastq_to_Gvcf_GATK4.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# WORKFLOW DEFINITION
import "ruddle_fastq_to_gvcf_single_sample_gatk4.wdl" as single_wf
workflow Multisample_Fastq_to_Gvcf_GATK4 {
File inputSamplesFile
Array[Array[String]] inputSamples = read_tsv(inputSamplesFile)
String unmapped_bam_suffix
String ref_name
File ref_fasta
File ref_fasta_index
File ref_dict
File ref_amb
File ref_ann
File ref_bwt
File ref_pac
File ref_sa
File? ref_alt
String bwa_commandline
Int compression_level
File dbSNP_vcf
File dbSNP_vcf_index
Array[File] known_indels_sites_VCFs
Array[File] known_indels_sites_indices
File scattered_calling_intervals_list
# Align flowcell-level unmapped input bams in parallel
call make_uniq_samples_file {
input:
inputSamplesFile = inputSamplesFile,
}
scatter (sample in make_uniq_samples_file.uniq_samples) {
call make_fastq_file {
input:
inputSamplesFile = inputSamplesFile,
sample_name = sample
}
call single_wf.Fastq_to_Gvcf_GATK4 {
input:
sample_fastq_file = make_fastq_file.sample_fastq_file,
sample_name = sample,
unmapped_bam_suffix = unmapped_bam_suffix,
ref_name = ref_name,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
ref_dict = ref_dict,
ref_amb = ref_amb,
ref_ann = ref_ann,
ref_bwt = ref_bwt,
ref_pac = ref_pac,
ref_sa = ref_sa,
ref_alt = ref_alt,
bwa_commandline = bwa_commandline,
compression_level = compression_level,
dbSNP_vcf = dbSNP_vcf,
dbSNP_vcf_index = dbSNP_vcf_index,
known_indels_sites_VCFs = known_indels_sites_VCFs,
known_indels_sites_indices = known_indels_sites_indices,
scattered_calling_intervals_list = scattered_calling_intervals_list
}
}
# Outputs that will be retained when execution is complete
output {
Array[Array[File]] unmapped_bam = Fastq_to_Gvcf_GATK4.unmapped_bam
Array[File] duplication_metrics = Fastq_to_Gvcf_GATK4.duplication_metrics
Array[File] bqsr_report = Fastq_to_Gvcf_GATK4.bqsr_report
Array[File] analysis_ready_bam = Fastq_to_Gvcf_GATK4.analysis_ready_bam
Array[File] analysis_ready_bam_index = Fastq_to_Gvcf_GATK4.analysis_ready_bam_index
Array[File] analysis_ready_bam_md5 = Fastq_to_Gvcf_GATK4.analysis_ready_bam_md5
Array[File] output_vcf = Fastq_to_Gvcf_GATK4.output_vcf
Array[File] output_vcf_index = Fastq_to_Gvcf_GATK4.output_vcf_index
File uniq_samples = make_uniq_samples_file.uniq_samples_file
}
}
# TASK DEFINITIONS
task make_uniq_samples_file {
File inputSamplesFile
command {
cat ${inputSamplesFile} | cut -f1 | sort | uniq > uniq_samples.list
}
output {
File uniq_samples_file = "uniq_samples.list"
Array[String] uniq_samples = read_lines("uniq_samples.list")
}
}
task make_fastq_file {
File inputSamplesFile
String sample_name
command {
cat ${inputSamplesFile} | grep "^${sample_name}\s" > ${sample_name}.fastqs.txt
}
output {
File sample_fastq_file = "${sample_name}.fastqs.txt"
}
}