-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathphypartspiecharts.py
executable file
·181 lines (139 loc) · 6.28 KB
/
phypartspiecharts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python
helptext= '''
Generate the "Pie Chart" representation of gene tree conflict from Smith et al. 2015 from
the output of phyparts, the bipartition summary software described in the same paper.
The input files include three files produced by PhyParts, and a file containing a species
tree in Newick format (likely, the tree used for PhyParts). The output is an SVG containing
the phylogeny along with pie charts at each node.
Requirements:
Python 2.7
ete3
matplotlib
'''
import matplotlib,sys,argparse
from ete3 import Tree, TreeStyle, TextFace,NodeStyle,faces, COLOR_SCHEMES
#Read in species tree and convert to ultrametric
#Match phyparts nodes to ete3 nodes
def get_phyparts_nodes(sptree_fn,phyparts_root):
sptree = Tree(sptree_fn)
sptree.convert_to_ultrametric()
phyparts_node_key = [line for line in open(phyparts_root+".node.key")]
subtrees_dict = {n.split()[0]:Tree(n.split()[1]+";") for n in phyparts_node_key}
subtrees_topids = {}
for x in subtrees_dict:
subtrees_topids[x] = subtrees_dict[x].get_topology_id()
#print(subtrees_topids['1'])
#print()
for node in sptree.traverse():
node_topid = node.get_topology_id()
if "Takakia_4343a" in node.get_leaf_names():
print(node_topid)
print(node)
for subtree in subtrees_dict:
if node_topid == subtrees_topids[subtree]:
node.name = subtree
return sptree,subtrees_dict,subtrees_topids
#Summarize concordance and conflict from Phyparts
def get_concord_and_conflict(phyparts_root,subtrees_dict,subtrees_topids):
with open(phyparts_root + ".concon.tre") as phyparts_trees:
concon_tree = Tree(phyparts_trees.readline())
conflict_tree = Tree(phyparts_trees.readline())
concord_dict = {}
conflict_dict = {}
for node in concon_tree.traverse():
node_topid = node.get_topology_id()
for subtree in subtrees_dict:
if node_topid == subtrees_topids[subtree]:
concord_dict[subtree] = node.support
for node in conflict_tree.traverse():
node_topid = node.get_topology_id()
for subtree in subtrees_dict:
if node_topid == subtrees_topids[subtree]:
conflict_dict[subtree] = node.support
return concord_dict, conflict_dict
#Generate Pie Chart data
def get_pie_chart_data(phyparts_root,total_genes,concord_dict,conflict_dict):
phyparts_hist = [line for line in open(phyparts_root + ".hist")]
phyparts_pies = {}
phyparts_dict = {}
for n in phyparts_hist:
n = n.split(",")
tot_genes = float(n.pop(-1))
node_name = n.pop(0)[4:]
concord = float(n.pop(0))
concord = concord_dict[node_name]
all_conflict = conflict_dict[node_name]
if len(n) > 0:
most_conflict = max([float(x) for x in n])
else:
most_conflict = 0.0
adj_concord = (concord/total_genes) * 100
adj_most_conflict = (most_conflict/total_genes) * 100
other_conflict = (all_conflict - most_conflict) / total_genes * 100
the_rest = (total_genes - concord - all_conflict) / total_genes * 100
pie_list = [adj_concord,adj_most_conflict,other_conflict,the_rest]
phyparts_pies[node_name] = pie_list
phyparts_dict[node_name] = [int(round(concord,0)),int(round(tot_genes-concord,0))]
return phyparts_dict, phyparts_pies
def node_text_layout(mynode):
F = faces.TextFace(mynode.name,fsize=20)
faces.add_face_to_node(F,mynode,0,position="branch-right")
parser = argparse.ArgumentParser(description=helptext,formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('species_tree',help="Newick formatted species tree topology.")
parser.add_argument('phyparts_root',help="File root name used for Phyparts.")
parser.add_argument('num_genes',type=int,default=0,help="Number of total gene trees. Used to properly scale pie charts.")
parser.add_argument('--taxon_subst',help="Comma-delimted file to translate tip names.")
parser.add_argument("--svg_name",help="File name for SVG generated by script",default="pies.svg")
parser.add_argument("--show_nodes",help="Also show tree with nodes labeled same as PhyParts",action="store_true",default=False)
parser.add_argument("--colors",help="Four colors of the pie chart: concordance (blue) top conflict (green), other conflict (red), no signal (gray)",nargs="+",default=["blue","green","red","dark gray"])
args = parser.parse_args()
plot_tree,subtrees_dict,subtrees_topids = get_phyparts_nodes(args.species_tree, args.phyparts_root)
#print(subtrees_dict)
concord_dict, conflict_dict = get_concord_and_conflict(args.phyparts_root,subtrees_dict,subtrees_topids)
phyparts_dist, phyparts_pies = get_pie_chart_data(args.phyparts_root,args.num_genes,concord_dict,conflict_dict)
if args.taxon_subst:
taxon_subst = {line.split(",")[0]:line.split(",")[1] for line in open(args.taxon_subst,'U')}
for leaf in plot_tree.get_leaves():
try:
leaf.name = taxon_subst[leaf.name]
except KeyError:
print(leaf.name)
continue
def phyparts_pie_layout(mynode):
if mynode.name in phyparts_pies:
pie= faces.PieChartFace(phyparts_pies[mynode.name],
#colors=COLOR_SCHEMES["set1"],
colors = args.colors,
width=50, height=50)
pie.border.width = None
pie.opacity = 1
faces.add_face_to_node(pie,mynode, 0, position="branch-right")
concord_text = faces.TextFace(str(int(concord_dict[mynode.name]))+' ',fsize=20)
conflict_text = faces.TextFace(str(int(conflict_dict[mynode.name]))+' ',fsize=20)
faces.add_face_to_node(concord_text,mynode,0,position = "branch-top")
faces.add_face_to_node(conflict_text,mynode,0,position="branch-bottom")
else:
F = faces.TextFace(mynode.name,fsize=20)
faces.add_face_to_node(F,mynode,0,position="aligned")
#Plot Pie Chart
ts = TreeStyle()
ts.show_leaf_name = False
ts.layout_fn = phyparts_pie_layout
nstyle = NodeStyle()
nstyle["size"] = 0
for n in plot_tree.traverse():
n.set_style(nstyle)
n.img_style["vt_line_width"] = 0
ts.draw_guiding_lines = True
ts.guiding_lines_color = "black"
ts.guiding_lines_type = 0
ts.scale = 30
ts.branch_vertical_margin = 10
plot_tree.convert_to_ultrametric()
plot_tree.ladderize(direction=1)
my_svg = plot_tree.render(args.svg_name,tree_style=ts,w=595,dpi=300)
if args.show_nodes:
node_style = TreeStyle()
node_style.show_leaf_name=False
node_style.layout_fn = node_text_layout
plot_tree.render("tree_nodes.pdf",tree_style=node_style)