-
Notifications
You must be signed in to change notification settings - Fork 10
/
process_sra_file.py
executable file
·40 lines (34 loc) · 1.31 KB
/
process_sra_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python3
"""
This file performs a single run of the following:
1. Convert reads from SRA to FASTQ
2. Align reads with HISAT2, in single- or paired-end as appropriate
3. Map reads to genes
4. Convert counts to RPKM
5. Save RPKM and summary data
"""
import argparse
from pathlib import Path
from pprint import pprint
from data_path_utils import append_to_filename
import pandas as pd
from alignment import process_sra_file
from utils import add_common_command_line_arguments
if __name__ == '__main__':
p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('sra_path', type=Path, help='Path to SRA file')
add_common_command_line_arguments(p)
args = p.parse_args()
rpkm, alignment_metadata = process_sra_file(
sra_path=args.sra_path,
subprocesses=args.subprocesses,
hisat2_options=args.hisat2_options,
reference_path=args.reference_path,
)
print('Alignment metadata:')
pprint(alignment_metadata)
rpkm_path = append_to_filename(args.sra_path.with_suffix('.hdf5'), '_rpkm')
print('Saving RPKM to', rpkm_path)
with pd.HDFStore(rpkm_path) as store:
store['rpkm'] = pd.DataFrame({args.sra_path.stem: rpkm})
store['alignment_metadata'] = pd.DataFrame({args.sra_path.stem: alignment_metadata})