forked from cbg-ethz/pangolin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pubmut
executable file
·54 lines (45 loc) · 1.41 KB
/
pubmut
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
import numpy as np
import pandas as pd
import sys
import os
#import re
import argparse
import csv
import json
import yaml
import gzip
import pysam
# parse command line
argparser = argparse.ArgumentParser(description="make a pretty table")
argparser.add_argument('-j', '--json', metavar='JSON', required=True,
type=str, dest='json', help="results generated by mutbamscan")
args = argparser.parse_args()
assert os.path.isfile(args.json), f"cannot find result json file {args.json}"
table={}
with open(args.json, 'rt') as jf:
table=json.load(fp=jf)
df_dict={}
for sam,amplicons in table.items():
df_dict[sam]={}
print(sam)
for ampname,amp in amplicons.items():
# get topmost
sites_cnt_l=-1
sites_cnt=0
if amp['sites']: # empty ?
(sites_cnt_l,sites_cnt)=list(amp['sites'].items())[-1]
muts_cnt_l=-1
muts_cnt=0
if amp['muts']: # empty ?
(muts_cnt_l,muts_cnt)=list(amp['muts'].items())[-1]
if int(muts_cnt_l) < int(sites_cnt_l):
muts_cnt = 0
# pack into dict for pandas
df_dict[sam].update({
ampname: f"{muts_cnt} / {sites_cnt}\n{f'{100*float(muts_cnt)/float(sites_cnt) :.2f}%' if muts_cnt and sites_cnt else ''}"
})
pretty_table_df=pd.DataFrame.from_dict(data=df_dict, orient='index')
with pd.option_context('display.max_rows', None): #, 'display.max_columns', None):
print(pretty_table_df)
pretty_table_df.to_csv('scanned_article.csv', sep="\t", compression={'method':'infer'})