-
Notifications
You must be signed in to change notification settings - Fork 2
/
circos.py
158 lines (137 loc) · 7.27 KB
/
circos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import numpy as np
import pandas as pd
import re
import os
import shutil
from MetaGenome.pyutils.tools import generate_span
from MetaGenome.pipconfig import settings
# import pdb
import argparse
class Circos(object):
def __init__(self, table, number=10, mapping_file=None, category=None, by_group_mean=False, out_dir="./", prefix=""):
self.__base_path = re.sub('[^/]+$', "", __file__)
self.__read_conf__()
self.__numder = number
self.__prefix = prefix
self.__read_colors__()
self.__out_dir = out_dir + "/"
self.conf = self.__out_dir + "circos_conf"
self.__outconf = self.conf + "/"
self.__init_data__(table, mapping_file, category, by_group_mean)
self.__number_of_otu, self.__number_of_sample = self.data.shape
if not os.path.exists(self.__out_dir):
os.makedirs(self.__out_dir)
if os.path.exists(self.conf):
shutil.rmtree(self.conf)
shutil.copytree(self.__base_path + "/circos_config", self.conf)
otu_col_index, sam_col_index = generate_span(self.data.shape)
self.__otu_col = self.__colors[otu_col_index[0]:otu_col_index[1]]
self.__sam_col = self.__colors[sam_col_index[0]:sam_col_index[1]]
self.__rev_sam_name, self.__rev_otu_name = (self.data.columns.values.reshape((-1, 1))[::-1, :],
self.data.index.values.reshape((-1, 1))[::-1, :])
self.__rev_sam_col = self.__sam_col[::-1, :]
self.__rev_otu_col = self.__otu_col[::-1, :]
def __read_conf__(self):
js = settings.path
self._circos = js['circos_path']
self._etc = js['circos_etc']
def __init_data__(self, table, mapping_file, category, by_group_mean):
otu = self.read_tsv(table)
otu = otu if not otu.dtypes[-1] == np.dtype("O") else otu.drop(otu.columns[-1], axis=1)
otu = otu if otu.max().max() > 1 else otu * 1000000
# pdb.set_trace()
otu.index = [i.replace(' ', '_').replace(';', '_').replace(
"'", '').replace('(', '_').replace(')', '_').replace(':', '_') for i in otu.index]
if mapping_file and category:
mapf = self.read_tsv(mapping_file)[category].dropna().sort_values()
otu = otu.filter(items=mapf.index, axis=1)
else:
otu = otu.sort_index(axis=1)
otu = otu.iloc[otu.sum(axis=1).values.argsort()[::-1][:self.__numder]]
otu = otu.groupby(mapf, axis=1).mean() if by_group_mean else otu
self.data = otu.applymap(lambda x: int(np.round(x)))
def __read_colors__(self):
colors = []
with open(self._etc + "/colors.brewer.conf") as color_file:
for line in color_file:
line = line.strip()
if line and not(line.startswith("#")):
colors.append(re.sub('=.+', "", line).strip())
# colors = colors[::-1]
# colors.sort()
colors = np.array(colors).reshape((-1, 1))
self.__colors = np.sort(colors, axis=0)
# print(self.__colors)
# print(self.__colors)
def rep_each(self, x, each) -> np.array:
return np.array([[i] * each for i in x]).reshape((-1, 1))
def read_tsv(self, path):
return pd.read_csv(path, sep="\t", index_col=0)
def write_conf(self, df, name):
df.to_csv(self.__outconf + name, sep=" ", header=False, index=False)
def write_karyotype(self):
all_sum = pd.concat([self.data.sum(axis=0), self.data.sum(axis=1)], axis=0)
kar_data = pd.DataFrame(np.zeros((all_sum.shape[0], 7)))
kar_data[0] = "chr"
kar_data[1] = "-"
kar_data[2] = all_sum.index
kar_data[3] = all_sum.index
kar_data[4] = 0
kar_data[5] = all_sum.values
kar_data[6] = np.vstack((self.__sam_col, self.__otu_col))
self.write_conf(kar_data, "karyotype.txt")
def write_highlight(self):
data = self.data
data = data.loc[data.index[::-1], data.columns[::-1]]
pre = "fill_color="
self.otu = np.array(data.apply(generate_span, axis=1).values.tolist())
otu = pd.DataFrame(np.hstack((self.__rev_otu_name.repeat(self.__number_of_sample, axis=0),
self.otu.reshape((-1, 2)), np.char.add(pre, np.tile(self.__rev_sam_col, (self.__number_of_otu, 1))))))
self.write_conf(otu, "highlight_spec.txt")
self.sample = np.array(data.apply(generate_span, axis=0).values.T.tolist())
sample = pd.DataFrame(np.hstack((self.__rev_sam_name.repeat(self.__number_of_otu, axis=0),
self.sample.reshape((-1, 2)), np.char.add(pre, np.tile(self.__rev_otu_col, (self.__number_of_sample, 1))))))
self.write_conf(sample, "highlight_site.txt")
self.write_conf(pd.DataFrame(np.vstack((sample.values, otu.values))), "highlight_all.txt")
def write_links(self):
link = pd.DataFrame(
np.hstack((np.tile(self.__rev_sam_name, (self.__number_of_otu, 1)),
self.sample.reshape((-1, 2), order="F"),
self.__rev_otu_name.repeat(self.__number_of_sample, axis=0),
self.otu.reshape((-1, 2)),
np.char.add("color=", self.__rev_otu_col.repeat(self.__number_of_sample, axis=0))
))
)
self.write_conf(link, "links.txt")
def __init__path__(self):
with open(self.__outconf + "image.generic.conf", 'r', encoding='utf-8') as ci:
out = ci.read() % (self.__out_dir, self.__prefix + 'circos.png')
with open(self.__outconf + "image.generic.conf", 'w', encoding='utf-8') as co:
co.write(out)
def visualize(self):
self.write_karyotype()
self.write_highlight()
self.write_links()
self.__init__path__()
os.system(self._circos + " -conf " + self.__outconf + "circos.conf")
if __name__ == '__main__':
p = argparse.ArgumentParser(
description="This script is used to plot RDA of species. The numeric enviroment factors must be encluded in maping file. The categories will be filterd before RDA")
p.add_argument('-i', '--input', dest='input', metavar='<path>',
help='Taxonomic count data file')
p.add_argument('-o', '--output', dest='output', metavar='<directory>', default='./',
help='Given an output directory')
p.add_argument('-m', '--map', dest='map', metavar='<path>',
help='Sample metadata file')
p.add_argument('-g', '--group', dest='group', metavar='<str>',
help='Column name in sample-metadata file')
p.add_argument('-n', '--number', dest='number', metavar='<int>', default='10',
help='Specify how many species to be display, defaulf is 10')
p.add_argument('-b', '--by-groupMean', dest='by', metavar='<bool>', default=False,
help='Pass True to use group mean to plot circos')
p.add_argument('-p', '--prefix', dest='prefix', metavar='<int>', default="",
help='The prefix of output files, default if null')
options = p.parse_args()
c = Circos(table=options.input, number=int(options.number), mapping_file=options.map,
category=options.group, by_group_mean=bool(options.by), out_dir=options.output, prefix=options.prefix)
c.visualize()