-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathVCF_combiner.py
74 lines (60 loc) · 1.83 KB
/
VCF_combiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
### Boas Pucker ###
### bpucker@cebitec.uni-bielefeld.de ###
__usage__ = """
python VCF_combiner.py
--in <INPUT_DIRECTORY>
--out <OUTPUT_VCF>
"""
import re, glob
# --- end of imports --- #
def load_vcf_content( vcf ):
"""! @brief load content of VCF file """
variants = {}
with open( vcf, "r" ) as f:
line = f.readline()
while line:
if line[0] != '#':
parts = line.strip().split('\t')
if not ',' in parts[4]:
if parts[6] == "PASS":
variants.update( { parts[0] + "&" + parts[1].zfill(8) + "&.&" + parts[3] + "&" + parts[4]: parts[-1] } )
line = f.readline()
return variants
def main( arguments ):
"""! @brief run all parts """
inputd_dir = arguments[ arguments.index('--in')+1 ]
output_file = arguments[ arguments.index('--out')+1 ]
cutoff = 5
filenames = glob.glob( inputd_dir + "*.vcf" )
# --- loading data --- #
all_keys = {}
data = {}
for filename in filenames:
ID = filename.split('/')[-1].split('.')[0]
variants = load_vcf_content( filename )
for key in variants.keys():
try:
all_keys[ key ]
except KeyError:
all_keys.update( { key: None } )
data.update( { ID: variants } )
# --- generating output file --- #
sorted_IDs = sorted( data.keys() )
with open( output_file, "w" ) as out:
out.write( "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\t" + "\t".join( sorted_IDs ) + '\n' )
for key in sorted( all_keys.keys() ):
new_line = key.split('&') + [ ".", "PASS", ".", "GT:AD:DP:GQ:PL" ]
new_line[1] = str( int( new_line[1] ) )
counter = 0
for ID in sorted_IDs:
try:
new_line.append( data[ ID ][ key ] )
counter += 1
except KeyError:
new_line.append( "./.:0,0:0:.:0,0,0" )
if counter > cutoff:
out.write( "\t".join( new_line ) + "\n" )
if '--in' in sys.argv and '--out' in sys.argv:
main()
else:
sys.exit( __usage__ )