-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKlebsiella_plot.py
executable file
·218 lines (160 loc) · 6.21 KB
/
Klebsiella_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 24 09:23:13 2020
@author: jha
"""
#%%
import csv
import mysql.connector as mariadb
from tabulate import tabulate
import pandas as pd
amrdb= mariadb.connect(
host="localhost",
user="root",
passwd="Sukhoi@90",
database ="myamr"
)
cursor = amrdb.cursor(buffered=True) # else it fetches one row for everytime it is executed
print("We are at line 31 we have connection, lets begin")
#%%
#this is for multiple microbes and their location in the same dataframe
#%%
list_bugs=["Klebsiella_ast_us"]
list_gloc=[]
for x in list_bugs:
print(x)
sql=("select count(`#label`), collection_date, geo_loc_name from " +x+ " where collection_date like '%2015%' group by geo_loc_name")
print(sql)
numrows= cursor.execute("select count(`#label`), collection_date, geo_loc_name from " +x+ " where collection_date like '%2015%' group by geo_loc_name")
print("Selected %s rows" %numrows)
print("Selected %s rows " %cursor.rowcount)
rows =cursor.fetchall()#fetch all rows at once
print(tabulate(rows, headers=["count","collection_date","geo_loc_name"], tablefmt='psql'))
df_sample = pd.DataFrame(rows)
df_sample.columns=["count","collection_date","geo_loc_name"]
list_loc=[]
list_loc=df_sample["geo_loc_name"].tolist()
for index in list_loc:
list_gloc.append(index)
list_gloc=list(set(list_gloc))
gloc_df=pd.DataFrame()
gloc_df["Location"]=list_gloc
df_ec=pd.DataFrame()
df_ec["geo_loc"]= list_gloc
#%%
#%%
##when you want to make is sophesticated like subplots
##%%
#ist_bugs=["Klebsiella_ast_us"]
#list_gloc=[]
#for x in list_bugs:
# print(x)
# sql=("select collection_date from " +x+ " group by collection_date")
# print(sql)
# numrows= cursor.execute("select collection_date from " +x+ " group by collection_date")
# print("Selected %s rows" %numrows)
# print("Selected %s rows " %cursor.rowcount)
# rows =cursor.fetchall()#fetch all rows at once
# print(tabulate(rows, headers=["count","collection_date","geo_loc_name"], tablefmt='psql'))
# df_year = pd.DataFrame(rows)
# df_year.columns=["collection_date"]
#try:
# df_year=pd.to_datetime(df_year['collection_date'])
# df_year= df_year['collection_date'].dt.year
#
#except:
# pass
#
##%%
#%%
list_bugs=["Klebsiella_ast_us"]
list_gloc=[]
for x in list_bugs:
print(x)
sql=("select geo_loc_name , count(`#label`)from " +x+ " where collection_date like '%2015%' group by geo_loc_name")
print(sql)
numrows= cursor.execute("select geo_loc_name , count(`#label`) from " +x+ " where collection_date like '%2015%' group by geo_loc_name")
print("Selected %s rows" %numrows)
print("Selected %s rows " %cursor.rowcount)
lst=list(cursor.fetchall())#fetch all rows at once
lstd=dict(lst)
gloc_df[x] = gloc_df['Location'].map(lstd)
#%%
#%%
sql=("select state,City from latlon_states")
print(sql)
numrows= cursor.execute("select City,State from latlon_states")
print("Selected %s rows" %numrows)
print("Selected %s rows " %cursor.rowcount)
lst=list(cursor.fetchall())#fetch all rows at once
lstd=dict(lst)
#%%
#%%
list_gloc=gloc_df["Location"].tolist()
for row in list_gloc:
if row is not None:
loc=list_gloc.index(row)
row1=row.split(":")[-1]
list_gloc.remove(row)
list_gloc.insert(loc,row1)
print (row)
for row in list_gloc:
if row is not None:
loc=list_gloc.index(row)
row1=row.split(", ")[-1]
list_gloc.remove(row)
list_gloc.insert(loc,row1)
print (row)
gloc_df["Location"]=list_gloc
gloc_df["Location"]=gloc_df['Location'].str.strip()
gloc_df["Location"]=gloc_df['Location'].str.strip()#needed twice because some have double spaces
gloc_df=gloc_df.replace(to_replace ="USA", value ="Ohio")
gloc_df=gloc_df.replace(to_replace ="OH", value ="Ohio")
gloc_df=gloc_df.replace(to_replace ="NY", value ="New York")
gloc_df=gloc_df.replace(to_replace ="Bethesa", value ="Maryland")
gloc_df=gloc_df.replace(to_replace ="MA", value ="Maryland")
gloc_df=gloc_df.replace(to_replace ="CA", value ="California")
gloc_df=gloc_df.replace(to_replace ="MN", value ="Minnesota")
gloc_df=gloc_df.replace(to_replace ="Boston", value ="Massachusetts")
gloc_df=gloc_df.replace(to_replace ="Boston, MA", value ="Massachusetts")
#gloc_df = gloc_df.groupby(['Location']).sum()
#gloc_df = gloc_df.reset_index()
gloc_df['state']=gloc_df['Location'].map(lstd) #the location needs to be mapped to state codes for klebsiella or other new drugs too
#%%
#%%
import plotly
import plotly.graph_objs as go
#scl = [
# [0.0, 'rgb(242,240,247)'],
# [0.2, 'rgb(218,218,235)'],
# [0.4, 'rgb(188,189,220)'],
# [0.6, 'rgb(158,154,200)'],
# [0.8, 'rgb(117,107,177)'],
# [1.0, 'rgb(84,39,143)']
#]
#df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2015_us_ag_exports.csv')
gloc_df['text'] = gloc_df['Location'].astype(str) #+ '<br>' + \
# + (gloc_df['bioproject_center']).astype(str) #+'<br>' + ' Listeria ' + (gloc_df['listeria_percent']/100).astype(str) + '<br>' + \
#'Campylobacter ' + (gloc_df['campylobacter_percent']/100).astype(str) +'<br>' + ' Ecoli ' + (gloc_df['ecoli_percent']/100).astype(str)
data = [go.Choropleth(colorscale ='reds',autocolorscale = False,locations = gloc_df['state'],z = gloc_df['Klebsiella_ast_us'].astype(float),locationmode = 'USA-states',
text = gloc_df['text'],
marker = go.choropleth.Marker(line = go.choropleth.marker.Line(color="black",width = 2
)),
colorbar = go.choropleth.ColorBar(
title = "number of samples")
)]
layout = go.Layout(
title = go.layout.Title(
text = 'Samples tested for Klebsiella to anitimicrobials in 2015 '),
geo = go.layout.Geo(
scope = 'usa',
projection = go.layout.geo.Projection(type = 'albers usa'),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)'),
)
fig = go.Figure(data = data, layout = layout)
plotly.offline.plot(fig,filename='state_bioproject2015_plot')
#fig.save(filename='state_plot')
#colorscale = scl,
#%%