-
Notifications
You must be signed in to change notification settings - Fork 0
/
scaling.py
executable file
·93 lines (83 loc) · 2.91 KB
/
scaling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/python3
import sys
import numpy as np
import pandas as pd
class Scaling():
"""
A Class which contains some of the popular scaling methods.
"""
def __checkna(self, data: [np.array, list]) -> None:
"""
Checks whether data contains NA or nan values or not.
"""
nele = np.prod(np.shape(data))
if any(pd.isna(np.reshape(data, (nele, 1)))):
print("Data contains NA or nan values")
print("Give the data which is NA or nan free")
sys.exit()
def standerdization(self, data: [np.array, list]) -> np.array:
"""
It is a scaling technique which scales the data such that it's mean
zero and standerd deviation is one.
"""
data = np.array(data)
self.__checkna(data)
shape = np.shape(data)
if len(shape) == 1:
data = (data - np.mean(data))/np.std(data)
elif len(shape) == 2:
for i in range(shape[-1]):
mu = np.mean(data[:, i])
sigma = np.std(data[:, i])
data[:,i] = (data[:,i] - mu)/sigma
else:
print("This is only upto two-dimensional data")
return data
def normalization(self, data: [np.array, list]) -> np.array:
"""
The values of the data are scaled to the interval [-1, 1] with a
mean of zero.
"""
data = np.array(data)
self.__checkna(data)
shape = np.shape(data)
if len(shape) == 1:
data = (data - np.mean(data))/(max(data)-min(data))
elif len(shape) == 2:
for i in range(shape[-1]):
mu = np.mean(data[:, i])
maxi = max(data[:, i])
mini = min(data[:, i])
data[:,i] = (data[:,i] - mu)/(maxi - mini)
else:
print("This is only upto two-dimensional data")
return data
def minmax_scaling(self, data: [np.array, list]) -> np.array:
"""
The values of the data are scaled to the interval [0, 1].
"""
data = np.array(data)
self.__checkna(data)
shape = np.shape(data)
if len(shape) == 1:
data = (data - min(data))/(max(data)-min(data))
elif len(shape) == 2:
for i in range(shape[-1]):
mini = min(data[:, i])
maxi = max(data[:, i])
data[:,i] = (data[:,i] - mini)/(maxi - mini)
else:
print("This is only upto two-dimensional data")
return data
if __name__ == '__main__':
"""
This code is only for testing and validation of methods
"""
data = [3, 2, np.nan, 4]
data1 = [3, 4, 12, 20]
data2 = np.array([2,3,1,24,5,2])
std = scaling()
print(std.standerdization(data1))
print(std.standerdization([[3, 4],[12, 20]]))
print(std.minmax_scaling(data2))
print(std.normalization(data))