-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathvisualize.py
186 lines (154 loc) · 7.06 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import argparse
import os
import os.path
import subprocess
import sys
def parse_args():
parser = argparse.ArgumentParser(
description=('Generate files for visualizing ESPRESSO output'))
parser.add_argument('--genome-fasta',
help='the fasta input to ESPRESSO',
required=True)
parser.add_argument('--updated-gtf',
help='the *_updated.gtf file output by ESPRESSO',
required=True)
parser.add_argument('--abundance-esp',
help='the *_abundance.esp file output by ESPRESSO',
required=True)
parser.add_argument(
'--target-gene',
help=('the name of the gene to visualize. transcripts with name like'
' {target-gene}-{number} or gene_id like {target-gene}.* will'
' have output generated. Use the gene_id to match novel'
' isoforms output by ESPRESSO'),
required=True)
parser.add_argument(
'--minimum-count',
help=('only isoforms where the (normalized) count for a sample meets'
' the minimum count are considered'),
type=float,
required=True)
parser.add_argument('--normalize-counts-to-cpm',
help='convert raw counts to counts per million',
action='store_true')
parser.add_argument(
'--descriptive-name',
help='used as a label in the visualization and for filenames',
required=True)
parser.add_argument('--output-dir',
help='where to write visualization files',
required=True)
return parser.parse_args()
def get_python_executable():
# Try to get the absolute path of the executable for the running
# Python interpreter.
python_executable = sys.executable
if not python_executable:
# Fallback
print(
'Absolute path for current Python interpreter not found.'
' Using "python" without a full path to run scripts',
file=sys.stderr)
python_executable = 'python'
return python_executable
def get_script_dir():
script_path = os.path.realpath(__file__)
return os.path.dirname(script_path)
def basename_without_extension(path):
basename = os.path.basename(path)
without_ext, ext = os.path.splitext(basename)
return without_ext
def run_gtf_to_bed(python_executable, script_dir, updated_gtf, output_dir,
descriptive_name):
gtf_to_bed_script_path = os.path.join(script_dir, 'gtf_to_bed.py')
gtf_without_ext = basename_without_extension(updated_gtf)
bed_basename = '{}.bed'.format(gtf_without_ext)
bed_path = os.path.join(output_dir, bed_basename)
gtf_to_bed_cmd = [
python_executable, gtf_to_bed_script_path, '--updated-gtf',
updated_gtf, '--output-bed', bed_path, '--descriptive-name',
descriptive_name
]
print(gtf_to_bed_cmd)
subprocess.run(gtf_to_bed_cmd, check=True)
return bed_path
def run_normalize_abundance(python_executable, script_dir, abundance_esp,
output_dir, descriptive_name):
normalize_abundance_script_path = os.path.join(script_dir,
'normalize_abundance.py')
output_path = os.path.join(output_dir,
'{}_cpm.esp'.format(descriptive_name))
normalize_abundance_cmd = [
python_executable, normalize_abundance_script_path, '--abundance-esp',
abundance_esp, '--output-path', output_path
]
print(normalize_abundance_cmd)
subprocess.run(normalize_abundance_cmd, check=True)
return output_path
def index_fasta(fasta):
index_path = '{}.fai'.format(fasta)
if os.path.exists(index_path):
return
index_cmd = ['samtools', 'faidx', fasta]
print(index_cmd)
subprocess.run(index_cmd, check=True)
def run_fasta_to_chrom_sizes(fasta, output_dir):
fasta_without_ext = basename_without_extension(fasta)
two_bit_basename = '{}.2bit'.format(fasta_without_ext)
two_bit_path = os.path.join(output_dir, two_bit_basename)
fa_to_two_bit_cmd = ['faToTwoBit', '-long', fasta, two_bit_path]
print(fa_to_two_bit_cmd)
subprocess.run(fa_to_two_bit_cmd, check=True)
chrom_sizes_basename = '{}.chrom.sizes'.format(fasta_without_ext)
chrom_sizes_path = os.path.join(output_dir, chrom_sizes_basename)
two_bit_info_cmd = ['twoBitInfo', two_bit_path, chrom_sizes_path]
print(two_bit_info_cmd)
subprocess.run(two_bit_info_cmd, check=True)
return chrom_sizes_path
def run_make_sample_bigwigs(python_executable, script_dir, abundance_esp,
chrom_sizes, annotation_bed, output_dir):
make_sample_bigwigs_script_path = os.path.join(script_dir,
'make_sample_bigwigs.py')
make_sample_bigwigs_cmd = [
python_executable, make_sample_bigwigs_script_path, '--abundance-esp',
abundance_esp, '--chrom-sizes', chrom_sizes, '--annotation-bed',
annotation_bed, '--output-dir', output_dir
]
print(make_sample_bigwigs_cmd)
subprocess.run(make_sample_bigwigs_cmd, check=True)
def run_make_isoform_bedgraphs(python_executable, script_dir, abundance_esp,
annotation_bed, target_gene, minimum_count,
output_dir):
make_isoform_bedgraphs_script_path = os.path.join(
script_dir, 'make_isoform_bedgraphs.py')
make_isoform_bedgraphs_cmd = [
python_executable, make_isoform_bedgraphs_script_path,
'--abundance-esp', abundance_esp, '--annotation-bed', annotation_bed,
'--target-gene', target_gene, '--minimum-count',
str(minimum_count), '--output-dir', output_dir
]
print(make_isoform_bedgraphs_cmd)
subprocess.run(make_isoform_bedgraphs_cmd, check=True)
def main():
args = parse_args()
python_executable = get_python_executable()
script_dir = get_script_dir()
os.makedirs(args.output_dir, exist_ok=True)
index_fasta(args.genome_fasta)
annotation_bed = run_gtf_to_bed(python_executable, script_dir,
args.updated_gtf, args.output_dir,
args.descriptive_name)
normalized_esp = args.abundance_esp
if args.normalize_counts_to_cpm:
normalized_esp = run_normalize_abundance(python_executable, script_dir,
args.abundance_esp,
args.output_dir,
args.descriptive_name)
chrom_sizes = run_fasta_to_chrom_sizes(args.genome_fasta, args.output_dir)
run_make_sample_bigwigs(python_executable, script_dir, normalized_esp,
chrom_sizes, annotation_bed, args.output_dir)
run_make_isoform_bedgraphs(python_executable, script_dir, normalized_esp,
annotation_bed, args.target_gene,
args.minimum_count, args.output_dir)
if __name__ == '__main__':
main()