- pd.read_csv(path, sep = "\t", header = None, names = ['pdate', 'pv', 'uv'])
- pd.read_excel
- pd.read_sql
import pandas as pd
import numpy as np
name = pd.read_csv("../name_id.xls", sep = "\t", names = ['sample', 'subpopulation'])
name
- dataframe --> 二维数据、整个表格、 多行多列
- series --> 一维数据、一行一列(类似于字典, 比较好处理)
- series 由一组数据(不同的数据类型)以及与之相关索引构成
s1 = pd.Series([1, 'a', 5.2, 7])
s1
0 1
1 a
2 5.2
3 7
dtype: object
RangeIndex(start=0, stop=4, step=1)
array([1, 'a', 5.2, 7], dtype=object)
s2 = pd.Series([1, 'a', 5.2, 7], index = ['a', 'b', 'd', 'e'])
s2
a 1
b a
d 5.2
e 7
dtype: object
sdata = {'Ohio' : 35000, 'Texas' : 72000, 'Oregon' : 16000, 'Utah' : 5000}
s3 = pd.Series(sdata)
s3
Ohio 35000
Texas 72000
Oregon 16000
Utah 5000
dtype: int64
- 类似字典dict
- 查询一个值, 返回的是这个值本身
- 同时查询多个值, 返回的是一个Series
a 1
b a
d 5.2
e 7
dtype: object
pandas.core.series.Series
- 每列可以是不同的类型
- 既有行索引index, 也有列索引columns
- 也可以被看做是由Series组成的字典
# 根据多个字典序列创建dataframe
data = {
'state' : ['Ohio', 'Utah', 'Ohio', 'Texas', 'Ohio'],
'year' : [2000, 2001, 2002, 2003, 2004],
'pop' : [1.5, 1.7, 3.6, 2.4, 2.9]
}
df = pd.DataFrame(data)
df
</style>
|
state |
year |
pop |
0 |
Ohio |
2000 |
1.5 |
1 |
Utah |
2001 |
1.7 |
2 |
Ohio |
2002 |
3.6 |
3 |
Texas |
2003 |
2.4 |
4 |
Ohio |
2004 |
2.9 |
- 查询一行, 结果是一个Series
- 查询多行, 结果是一个Dataframe
state Utah
year 2001
pop 1.7
Name: 1, dtype: object
</style>
|
state |
year |
pop |
1 |
Utah |
2001 |
1.7 |
2 |
Ohio |
2002 |
3.6 |
3 |
Texas |
2003 |
2.4 |
pandas.core.frame.DataFrame
- df.loc , 根据行、列的标签值查询
- df.iloc , 根据行、列的数字位置查询
- df.where
- df.query
- 使用单个label值进行查询
- 使用值列表批量查询
- 使用数值区间进行范围查询
- 使用条件表达式查询
- 调用函数查询
na = pd.read_table("../name_id.xls", header = None, names = ['samples', 'subpopulation'])
na
</style>
|
samples |
subpopulation |
0 |
110 |
CA |
1 |
111 |
CA |
2 |
113 |
CA |
3 |
117 |
CA |
4 |
120 |
CA |
5 |
124 |
CA |
6 |
128 |
CA |
7 |
129 |
CA |
8 |
130 |
CA |
9 |
142 |
CA |
10 |
143 |
CA |
11 |
147 |
CA |
12 |
153 |
CA |
13 |
161 |
CA |
14 |
162 |
CA |
15 |
170 |
CA |
16 |
171 |
CA |
17 |
172 |
CA |
18 |
188 |
CA |
19 |
194 |
CA |
20 |
197 |
CA |
21 |
201 |
CA |
22 |
203 |
CA |
23 |
212 |
CA |
24 |
215 |
CA |
25 |
218 |
CA |
26 |
22 |
CA |
27 |
223 |
CA |
28 |
225 |
CA |
29 |
230 |
CA |
30 |
23 |
CA |
31 |
237 |
CA |
32 |
238 |
CA |
33 |
240 |
CA |
34 |
241 |
CA |
35 |
246 |
CA |
36 |
257 |
CA |
37 |
260 |
CA |
38 |
265 |
CA |
39 |
272 |
CA |
40 |
274 |
CA |
41 |
28 |
CA |
42 |
286 |
CA |
43 |
287 |
CA |
44 |
29 |
CA |
45 |
297 |
CA |
46 |
309 |
CA |
47 |
311 |
CA |
48 |
315 |
CA |
49 |
33 |
CA |
50 |
345 |
CA |
51 |
36 |
CA |
52 |
43 |
CA |
53 |
46 |
CA |
54 |
49 |
CA |
55 |
52 |
CA |
56 |
534 |
CA |
57 |
537 |
CA |
58 |
538 |
CA |
59 |
539 |
CA |
60 |
546 |
CA |
61 |
5 |
CA |
62 |
606 |
CA |
63 |
615 |
CA |
64 |
637 |
CA |
65 |
644 |
CA |
66 |
653 |
CA |
67 |
655 |
CA |
68 |
66 |
CA |
69 |
669 |
CA |
70 |
681 |
CA |
71 |
685 |
CA |
72 |
688 |
CA |
73 |
71 |
CA |
74 |
73 |
CA |
75 |
757 |
CA |
76 |
76 |
CA |
77 |
767 |
CA |
78 |
77 |
CA |
79 |
819 |
CA |
80 |
84 |
CA |
81 |
85 |
CA |
82 |
863 |
CA |
83 |
887 |
CA |
84 |
88 |
CA |
85 |
89 |
CA |
86 |
90 |
CA |
87 |
921 |
CA |
88 |
94 |
CA |
89 |
953 |
CA |
90 |
955 |
CA |
91 |
956 |
CA |
92 |
957 |
CA |
93 |
95 |
CA |
94 |
959 |
CA |
95 |
960 |
CA |
96 |
961 |
CA |
97 |
963 |
CA |
98 |
966 |
CA |
99 |
967 |
CA |
100 |
969 |
CA |
101 |
970 |
CA |
102 |
973 |
CA |
103 |
974 |
CA |
104 |
975 |
CA |
105 |
977 |
CA |
106 |
980 |
CA |
107 |
982 |
CA |
108 |
983 |
CA |
109 |
985 |
CA |
110 |
986 |
CA |
111 |
987 |
CA |
112 |
988 |
CA |
113 |
989 |
CA |
114 |
993 |
CA |
115 |
CA_1 |
CA |
116 |
CA_2 |
CA |
117 |
P55 |
CA |
118 |
P56 |
CA |
119 |
MPSM10 |
MP |
120 |
MPSM11 |
MP |
121 |
MPSM12 |
MP |
122 |
MPSM13 |
MP |
123 |
MPSM15 |
MP |
124 |
MPSM16 |
MP |
125 |
MPSM17 |
MP |
126 |
MPSM18 |
MP |
127 |
MPSM1 |
MP |
128 |
MPSM20 |
MP |
129 |
MPSM21 |
MP |
130 |
MPSM22 |
MP |
131 |
MPSM23 |
MP |
132 |
MPSM24 |
MP |
133 |
MPSM25 |
MP |
134 |
MPSM26 |
MP |
135 |
MPSM27 |
MP |
136 |
MPSM28 |
MP |
137 |
MPSM29 |
MP |
138 |
MPSM2 |
MP |
139 |
MPSM30 |
MP |
140 |
MPSM31 |
MP |
141 |
MPSM32 |
MP |
142 |
MPSM33 |
MP |
143 |
MPSM34 |
MP |
144 |
MPSM35 |
MP |
145 |
MPSM36 |
MP |
146 |
MPSM37 |
MP |
147 |
MPSM38 |
MP |
148 |
MPSM3 |
MP |
149 |
MPSM4 |
MP |
150 |
MPSM5 |
MP |
151 |
MPSM6 |
MP |
152 |
MPSM7 |
MP |
153 |
MPSM8 |
MP |
154 |
MPSM9 |
MP |
155 |
553 |
WA |
156 |
660 |
WA |
157 |
855 |
WA |
158 |
856 |
WA |
159 |
904 |
WA |
160 |
917 |
WA |
161 |
920 |
WA |
162 |
922 |
WA |
163 |
923 |
WA |
164 |
926 |
WA |
165 |
927 |
WA |
166 |
928 |
WA |
167 |
929 |
WA |
168 |
930 |
WA |
169 |
931 |
WA |
170 |
932 |
WA |
171 |
933 |
WA |
172 |
934 |
WA |
173 |
935 |
WA |
174 |
936 |
WA |
175 |
937 |
WA |
176 |
938 |
WA |
177 |
939 |
WA |
178 |
940 |
WA |
179 |
941 |
WA |
180 |
942 |
WA |
181 |
943 |
WA |
182 |
944 |
WA |
183 |
945 |
WA |
184 |
946 |
WA |
185 |
947 |
WA |
186 |
948 |
WA |
187 |
949 |
WA |
188 |
950 |
WA |
189 |
951 |
WA |
190 |
952 |
WA |
191 |
MS-1004 |
WA |
192 |
MS-1008 |
WA |
193 |
MS-1010 |
WA |
194 |
MS-1011 |
WA |
195 |
MS-1012 |
WA |
196 |
MS-1013 |
WA |
197 |
MS-1014 |
WA |
198 |
MS-1015 |
WA |
199 |
MS-1041 |
WA |
200 |
MS-1057 |
WA |
201 |
MS-1154 |
WA |
202 |
MS-1156 |
WA |
203 |
MS-1157 |
WA |
204 |
MS-1161 |
WA |
205 |
MS-1167 |
WA |
206 |
MS-1168 |
WA |
207 |
MS-1169 |
WA |
208 |
MS-1170 |
WA |
209 |
MS-1171 |
WA |
210 |
MS-1172 |
WA |
211 |
MS-1174 |
WA |
212 |
MS-1175 |
WA |
213 |
MS-1176 |
WA |
214 |
MS-1177 |
WA |
215 |
MS-1178 |
WA |
216 |
MS-1184 |
WA |
217 |
MS-986 |
WA |
218 |
MS-989 |
WA |
219 |
MS-994 |
WA |
220 |
P109 |
WA |
221 |
P125 |
WA |
222 |
P132 |
WA |
223 |
P13 |
WA |
224 |
P141 |
WA |
225 |
P144 |
WA |
226 |
P145 |
WA |
227 |
P158 |
WA |
228 |
P15 |
WA |
229 |
P166 |
WA |
230 |
P17 |
WA |
231 |
P18 |
WA |
232 |
P20 |
WA |
233 |
P21 |
WA |
234 |
P29 |
WA |
235 |
P2 |
WA |
236 |
P42 |
WA |
237 |
P43 |
WA |
238 |
P44 |
WA |
239 |
P45 |
WA |
240 |
P50 |
WA |
241 |
P63 |
WA |
242 |
P64 |
WA |
243 |
P65 |
WA |
244 |
P67 |
WA |
245 |
P70 |
WA |
246 |
P7 |
WA |
247 |
P81 |
WA |
248 |
P84 |
WA |
249 |
P8 |
WA |
250 |
P14 |
OG |
251 |
P28 |
OG |
```python
# 设定索引为samples
na.set_index('samples', inplace = True)
na.index
```
Index(['110', '111', '113', '117', '120', '124', '128', '129', '130', '142',
...
'P64', 'P65', 'P67', 'P70', 'P7', 'P81', 'P84', 'P8', 'P14', 'P28'],
dtype='object', name='samples', length=252)
</style>
|
subpopulation |
samples |
|
110 |
CA |
111 |
CA |
113 |
CA |
117 |
CA |
120 |
CA |
124 |
CA |
128 |
CA |
129 |
CA |
130 |
CA |
142 |
CA |
143 |
CA |
147 |
CA |
153 |
CA |
161 |
CA |
162 |
CA |
170 |
CA |
171 |
CA |
172 |
CA |
188 |
CA |
194 |
CA |
197 |
CA |
201 |
CA |
203 |
CA |
212 |
CA |
215 |
CA |
218 |
CA |
22 |
CA |
223 |
CA |
225 |
CA |
230 |
CA |
23 |
CA |
237 |
CA |
238 |
CA |
240 |
CA |
241 |
CA |
246 |
CA |
257 |
CA |
260 |
CA |
265 |
CA |
272 |
CA |
274 |
CA |
28 |
CA |
286 |
CA |
287 |
CA |
29 |
CA |
297 |
CA |
309 |
CA |
311 |
CA |
315 |
CA |
33 |
CA |
345 |
CA |
36 |
CA |
43 |
CA |
46 |
CA |
49 |
CA |
52 |
CA |
534 |
CA |
537 |
CA |
538 |
CA |
539 |
CA |
546 |
CA |
5 |
CA |
606 |
CA |
615 |
CA |
637 |
CA |
644 |
CA |
653 |
CA |
655 |
CA |
66 |
CA |
669 |
CA |
681 |
CA |
685 |
CA |
688 |
CA |
71 |
CA |
73 |
CA |
757 |
CA |
76 |
CA |
767 |
CA |
77 |
CA |
819 |
CA |
84 |
CA |
85 |
CA |
863 |
CA |
887 |
CA |
88 |
CA |
89 |
CA |
90 |
CA |
921 |
CA |
94 |
CA |
953 |
CA |
955 |
CA |
956 |
CA |
957 |
CA |
95 |
CA |
959 |
CA |
960 |
CA |
961 |
CA |
963 |
CA |
966 |
CA |
967 |
CA |
969 |
CA |
970 |
CA |
973 |
CA |
974 |
CA |
975 |
CA |
977 |
CA |
980 |
CA |
982 |
CA |
983 |
CA |
985 |
CA |
986 |
CA |
987 |
CA |
988 |
CA |
989 |
CA |
993 |
CA |
CA_1 |
CA |
CA_2 |
CA |
P55 |
CA |
P56 |
CA |
MPSM10 |
MP |
MPSM11 |
MP |
MPSM12 |
MP |
MPSM13 |
MP |
MPSM15 |
MP |
MPSM16 |
MP |
MPSM17 |
MP |
MPSM18 |
MP |
MPSM1 |
MP |
MPSM20 |
MP |
MPSM21 |
MP |
MPSM22 |
MP |
MPSM23 |
MP |
MPSM24 |
MP |
MPSM25 |
MP |
MPSM26 |
MP |
MPSM27 |
MP |
MPSM28 |
MP |
MPSM29 |
MP |
MPSM2 |
MP |
MPSM30 |
MP |
MPSM31 |
MP |
MPSM32 |
MP |
MPSM33 |
MP |
MPSM34 |
MP |
MPSM35 |
MP |
MPSM36 |
MP |
MPSM37 |
MP |
MPSM38 |
MP |
MPSM3 |
MP |
MPSM4 |
MP |
MPSM5 |
MP |
MPSM6 |
MP |
MPSM7 |
MP |
MPSM8 |
MP |
MPSM9 |
MP |
553 |
WA |
660 |
WA |
855 |
WA |
856 |
WA |
904 |
WA |
917 |
WA |
920 |
WA |
922 |
WA |
923 |
WA |
926 |
WA |
927 |
WA |
928 |
WA |
929 |
WA |
930 |
WA |
931 |
WA |
932 |
WA |
933 |
WA |
934 |
WA |
935 |
WA |
936 |
WA |
937 |
WA |
938 |
WA |
939 |
WA |
940 |
WA |
941 |
WA |
942 |
WA |
943 |
WA |
944 |
WA |
945 |
WA |
946 |
WA |
947 |
WA |
948 |
WA |
949 |
WA |
950 |
WA |
951 |
WA |
952 |
WA |
MS-1004 |
WA |
MS-1008 |
WA |
MS-1010 |
WA |
MS-1011 |
WA |
MS-1012 |
WA |
MS-1013 |
WA |
MS-1014 |
WA |
MS-1015 |
WA |
MS-1041 |
WA |
MS-1057 |
WA |
MS-1154 |
WA |
MS-1156 |
WA |
MS-1157 |
WA |
MS-1161 |
WA |
MS-1167 |
WA |
MS-1168 |
WA |
MS-1169 |
WA |
MS-1170 |
WA |
MS-1171 |
WA |
MS-1172 |
WA |
MS-1174 |
WA |
MS-1175 |
WA |
MS-1176 |
WA |
MS-1177 |
WA |
MS-1178 |
WA |
MS-1184 |
WA |
MS-986 |
WA |
MS-989 |
WA |
MS-994 |
WA |
P109 |
WA |
P125 |
WA |
P132 |
WA |
P13 |
WA |
P141 |
WA |
P144 |
WA |
P145 |
WA |
P158 |
WA |
P15 |
WA |
P166 |
WA |
P17 |
WA |
P18 |
WA |
P20 |
WA |
P21 |
WA |
P29 |
WA |
P2 |
WA |
P42 |
WA |
P43 |
WA |
P44 |
WA |
P45 |
WA |
P50 |
WA |
P63 |
WA |
P64 |
WA |
P65 |
WA |
P67 |
WA |
P70 |
WA |
P7 |
WA |
P81 |
WA |
P84 |
WA |
P8 |
WA |
P14 |
OG |
P28 |
OG |
# 将subpopulation列中的CA替换为test
na.loc[:, "subpopulation"] = na["subpopulation"].str.replace("CA", "test").astype('str')
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
|
subpopulation |
samples |
|
110 |
test |
111 |
test |
113 |
test |
117 |
test |
120 |
test |
na.loc[['110', '117'], 'subpopulation']
samples
110 CA
117 CA
Name: subpopulation, dtype: object
na.loc['110':'117', 'subpopulation']
samples
110 CA
111 CA
113 CA
117 CA
Name: subpopulation, dtype: object
samples
111 CA
113 CA
117 CA
120 CA
124 CA
128 CA
129 CA
130 CA
142 CA
143 CA
147 CA
153 CA
161 CA
162 CA
170 CA
171 CA
172 CA
188 CA
194 CA
Name: subpopulation, dtype: object
na[na.loc[:,'subpopulation'] == 'CA']
na.loc[lambda na : (na['subpopulation'] == "CA") | (na['subpopulation'] == "OG"), :]
</style>
|
subpopulation |
samples |
|
110 |
CA |
111 |
CA |
113 |
CA |
117 |
CA |
120 |
CA |
124 |
CA |
128 |
CA |
129 |
CA |
130 |
CA |
142 |
CA |
143 |
CA |
147 |
CA |
153 |
CA |
161 |
CA |
162 |
CA |
170 |
CA |
171 |
CA |
172 |
CA |
188 |
CA |
194 |
CA |
197 |
CA |
201 |
CA |
203 |
CA |
212 |
CA |
215 |
CA |
218 |
CA |
22 |
CA |
223 |
CA |
225 |
CA |
230 |
CA |
23 |
CA |
237 |
CA |
238 |
CA |
240 |
CA |
241 |
CA |
246 |
CA |
257 |
CA |
260 |
CA |
265 |
CA |
272 |
CA |
274 |
CA |
28 |
CA |
286 |
CA |
287 |
CA |
29 |
CA |
297 |
CA |
309 |
CA |
311 |
CA |
315 |
CA |
33 |
CA |
345 |
CA |
36 |
CA |
43 |
CA |
46 |
CA |
49 |
CA |
52 |
CA |
534 |
CA |
537 |
CA |
538 |
CA |
539 |
CA |
546 |
CA |
5 |
CA |
606 |
CA |
615 |
CA |
637 |
CA |
644 |
CA |
653 |
CA |
655 |
CA |
66 |
CA |
669 |
CA |
681 |
CA |
685 |
CA |
688 |
CA |
71 |
CA |
73 |
CA |
757 |
CA |
76 |
CA |
767 |
CA |
77 |
CA |
819 |
CA |
84 |
CA |
85 |
CA |
863 |
CA |
887 |
CA |
88 |
CA |
89 |
CA |
90 |
CA |
921 |
CA |
94 |
CA |
953 |
CA |
955 |
CA |
956 |
CA |
957 |
CA |
95 |
CA |
959 |
CA |
960 |
CA |
961 |
CA |
963 |
CA |
966 |
CA |
967 |
CA |
969 |
CA |
970 |
CA |
973 |
CA |
974 |
CA |
975 |
CA |
977 |
CA |
980 |
CA |
982 |
CA |
983 |
CA |
985 |
CA |
986 |
CA |
987 |
CA |
988 |
CA |
989 |
CA |
993 |
CA |
CA_1 |
CA |
CA_2 |
CA |
P55 |
CA |
P56 |
CA |
P14 |
OG |
P28 |
OG |