-
Notifications
You must be signed in to change notification settings - Fork 0
/
fileio.py
executable file
·76 lines (59 loc) · 2.34 KB
/
fileio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import pandas as pd
from CustomLoader import CustomDataset
from torch.utils.data import Dataset, DataLoader
from src.augmentation import *
LABEL_DICT = {'PP': 0, 'PE':1}
def reshape_data(data_x, data_y):
data_x = data_x.reshape((data_x.shape[0], data_x.shape[1], 1))
data_y = data_y.reshape((data_y.shape[0]))
# Shuffle the array
idx = np.random.permutation(len(data_x))
data_x = data_x[idx]
data_y = data_y[idx]
# Make the labels not strings
data_y[data_y == -1] = 0
data_y = np.array([LABEL_DICT[zi] for zi in data_y])
return data_x, data_y
def load_data(path_x, path_y, csv=True):
if(csv):
data_x = pd.read_csv(path_x, header=0, dtype=float).to_numpy()
else:
data_x = pd.read_csv(path_x, header=0, dtype=float, sep='\t').to_numpy()
data_y = pd.read_csv(path_y, header=0).to_numpy()
return data_x, data_y
def reshape_test_data(test_x, test_y):
test_x = test_x.reshape((test_x.shape[0], test_x.shape[1], 1))
test_y = test_y.reshape((test_y.shape[0]))
test_y[test_y == -1] == 0
test_y = np.array([LABEL_DICT[zi] for zi in test_y])
test_x = np.asarray(test_x).astype('float32')
return test_x, test_y
def difference(dataset, interval=1):
diff = []
for i in range(interval, len(dataset)):
value = dataset[i] - dataset[i-interval]
diff.append(value)
return diff
def get_norder(dataset, order, diff_val):
differences = []
for i in range(dataset.shape[0]):
diff = dataset[i]
for i in range(order):
diff = difference(diff, interval=diff_val)
differences.append(diff)
differences = np.asarray(differences)
return differences
def get_dataloader(path_x, path_y, bs, csv=True):
data_x, data_y = load_data(path_x, path_y)
data_x = get_norder(data_x, 4, 1)
data_x, data_y = reshape_data(data_x, data_y)
shape = data_x.shape[1]
data = CustomDataset(data_x, data_y)
return DataLoader(dataset=data, batch_size=bs, shuffle=True), shape
def get_test_dataloader(path_x, path_y, bs, csv=True):
test_x, test_y = load_data(path_x, path_y, csv=csv)
test_x = get_norder(test_x, 4, 1)
test_x, test_y = reshape_test_data(test_x, test_y)
test = CustomDataset(test_x, test_y)
return DataLoader(dataset=test, batch_size=bs, shuffle=False)