-
Notifications
You must be signed in to change notification settings - Fork 0
/
categorical_converter2.py
61 lines (43 loc) · 1.28 KB
/
categorical_converter2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
import gc as gc
"""
USAGE: conver(data,cols)
data - numpy array of data
cols - tuple of columns to process. These columns should be categorical columns.
IMP: Indexing of colum in data starts with 0. Ypou cant index last column.
Ex: you want to index second col here, then
data
a b c
a b c
x y z
cols=(1,)
if you want to index 1st and second, then
cols=(0,1)
All 3
cols=(0,1,2)
You can also skip numeric column, which you dont want to encode, like
cols=(0,2) will skip 1 col
"""
lookup_data=dict()
def lookup(key):
global lookup_data
if key in lookup_data:
return lookup_data[key]
else:
if not lookup_data:
lookup_data[key]=1
return 1
else:
value=max(lookup_data.values())+1
lookup_data[key]=value
return value
def convert(data,cols):
global lookup_data
N,D=data.shape
for ix,i in enumerate(cols):
for x in range(N):
data[x,i]=lookup(data[x,i])
lookup_data.clear()
return data
if __name__=="__main__":
pass