forked from zhengchuanpan/GMAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
90 lines (83 loc) · 3.22 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import pandas as pd
# log string
def log_string(log, string):
log.write(string + '\n')
log.flush()
print(string)
# metric
def metric(pred, label):
with np.errstate(divide = 'ignore', invalid = 'ignore'):
mask = np.not_equal(label, 0)
mask = mask.astype(np.float32)
mask /= np.mean(mask)
mae = np.abs(np.subtract(pred, label)).astype(np.float32)
rmse = np.square(mae)
mape = np.divide(mae, label)
mae = np.nan_to_num(mae * mask)
mae = np.mean(mae)
rmse = np.nan_to_num(rmse * mask)
rmse = np.sqrt(np.mean(rmse))
mape = np.nan_to_num(mape * mask)
mape = np.mean(mape)
return mae, rmse, mape
def seq2instance(data, num_his, num_pred):
num_step, dims = data.shape
num_sample = num_step - num_his - num_pred + 1
x = np.zeros(shape = (num_sample, num_his, dims))
y = np.zeros(shape = (num_sample, num_pred, dims))
for i in range(num_sample):
x[i] = data[i : i + num_his]
y[i] = data[i + num_his : i + num_his + num_pred]
return x, y
def loadData(args):
# Traffic
df = pd.read_hdf(args.traffic_file)
Traffic = df.values
# train/val/test
num_step = df.shape[0]
train_steps = round(args.train_ratio * num_step)
test_steps = round(args.test_ratio * num_step)
val_steps = num_step - train_steps - test_steps
train = Traffic[: train_steps]
val = Traffic[train_steps : train_steps + val_steps]
test = Traffic[-test_steps :]
# X, Y
trainX, trainY = seq2instance(train, args.num_his, args.num_pred)
valX, valY = seq2instance(val, args.num_his, args.num_pred)
testX, testY = seq2instance(test, args.num_his, args.num_pred)
# normalization
mean, std = np.mean(trainX), np.std(trainX)
trainX = (trainX - mean) / std
valX = (valX - mean) / std
testX = (testX - mean) / std
# spatial embedding
f = open(args.SE_file, mode = 'r')
lines = f.readlines()
temp = lines[0].split(' ')
num_vertex, dims = int(temp[0]), int(temp[1])
SE = np.zeros(shape = (num_vertex, dims), dtype = np.float32)
for line in lines[1 :]:
temp = line.split(' ')
index = int(temp[0])
SE[index] = temp[1 :]
# temporal embedding
Time = df.index
dayofweek = np.reshape(Time.weekday, newshape = (-1, 1))
timeofday = (Time.hour * 3600 + Time.minute * 60 + Time.second) \
// Time.freq.delta.total_seconds()
timeofday = np.reshape(timeofday, newshape = (-1, 1))
Time = np.concatenate((dayofweek, timeofday), axis = -1)
# train/val/test
train = Time[: train_steps]
val = Time[train_steps : train_steps + val_steps]
test = Time[-test_steps :]
# shape = (num_sample, num_his + num_pred, 2)
trainTE = seq2instance(train, args.num_his, args.num_pred)
trainTE = np.concatenate(trainTE, axis = 1).astype(np.int32)
valTE = seq2instance(val, args.num_his, args.num_pred)
valTE = np.concatenate(valTE, axis = 1).astype(np.int32)
testTE = seq2instance(test, args.num_his, args.num_pred)
testTE = np.concatenate(testTE, axis = 1).astype(np.int32)
return (trainX, trainTE, trainY, valX, valTE, valY, testX, testTE, testY,
SE, mean, std)