-
Notifications
You must be signed in to change notification settings - Fork 0
/
polish_fasta.py
54 lines (41 loc) · 1.15 KB
/
polish_fasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
### Boas Pucker ###
### bpucker@cebitec.uni-bielefeld.de ###
### v0.1 ###
__usage__ = """
python polish_fasta.py
--in <INPUT_FASTA_FILE>
--out <OUTPUT_FASTA_FILE>
"""
import os, sys
# --- end of imports --- #
def load_sequences( multiple_fasta_file ):
"""! @brief load candidate gene IDs from file """
sequences = {}
names = []
with open( multiple_fasta_file ) as f:
header = f.readline()[1:].strip()
seq = []
line = f.readline()
while line:
if line[0] == '>':
sequences.update( { header: "".join( seq ) } )
names.append( header )
header = line.strip()[1:]
seq = []
else:
seq.append( line.strip() )
line = f.readline()
sequences.update( { header: "".join( seq ) } )
names.append( header )
return sequences, names
def main( arguments ):
input_file = arguments[ arguments.index('--in')+1 ]
output_file = arguments[ arguments.index('--out')+1 ]
seqs, names = load_sequences( input_file )
with open( output_file, "w" ) as out:
for name in names:
out.write( '>' + name + '\n' + seqs[ name ] + "\n" )
if '--in' in sys.argv and '--out' in sys.argv:
main( sys.argv )
else:
sys.exit( __usage__ )