-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworking.py
371 lines (302 loc) · 16.1 KB
/
working.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import datetime
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
#from statsmodels.tsa.seasonal import STL as stl
def _build_harm_matrix(
ts_rads: np.ndarray, # doys as radians in array
ns: int, # num of sin harmonics (e.g., 2)
nc: int # num of cos harmonics (e.g., 2)
) -> np.ndarray:
"""
Builds a design harmonic matrix based on Brooks et al. 2014 eq. 8.
Should be in format [1, sin(1t), cosh(1t), sin(2t), cos(2t), ...]
:param ts_rads: Numpy Array. DOYs as radians.
:param ns: Int. Number of sin harmonics (e.g., 2)
:param nc: Int. Number of cos harmonics (e.g., 2)
:return: Numpy Array. Harmonic design matrix.
"""
# count num of doys (as rads) in time series
num_ts = len(ts_rads)
# create column of constants = 1
con = np.full(num_ts, 1)
# create sin harmonic coefficient as columns
mat_sin = np.full((num_ts, ns), np.arange(ns) + 1).T
sin = np.sin(mat_sin * ts_rads).T
# create cos harmonic coefficient as columns
mat_cos = np.full((num_ts, nc), np.arange(nc) + 1).T
cos = np.cos(mat_cos * ts_rads).T
# combine all columns together
X = np.column_stack([con, sin, cos])
return X
def ewmacd_per_pixel(
pix,
ns,
nc,
history_bound,
doys,
years,
training_start,
training_end,
testing_end,
xbar_limit_1,
xbar_limit_2,
low_thresh,
lam,
lam_sigs,
rounding,
persistence
) -> None:
# FIXME: lots of below can be simplified with xr
dates = len(pix) # Convenience object
tmp = np.repeat(-2222, dates) # Coded 'No data' output, fills the output as an initial value
beta = np.repeat(np.nan, ns + nc + 1) # coded other 'No data' output for the coefficients
tmp_2 = -4 # backup value for pix
pix_00 = pix.copy() # backup value for myPixel
ind_00 = np.arange(0, dates) # Index list for original data
pix_01 = pix[0:history_bound + 1] # Training data (lt added + 1)
pix_02 = pix[history_bound + 1: dates] # Testing data
bkgd_ind_00 = np.where(~np.isnan(pix_00))[0] # Index for all non-missing data
pix_0 = pix[bkgd_ind_00] # 'Present' data (i.e any non nan
dates_00 = len(pix_0) # Convenience object for number of dates where not nan
bkgd_ind_01 = np.where(~np.isnan(pix_00) & (ind_00 <= history_bound))[0] # Index for non-missing training data
history_bound_01 = len(bkgd_ind_01) # Adjustment of training cutoff to reflect present data only
pix_1 = pix_00[bkgd_ind_01] # Present training data
timedat_01 = doys[bkgd_ind_01] # Present training dates, note the implicit dependence on DOYS
timedat_1 = timedat_01 * 2 * np.pi / 365 # Conversion of training dates only to [0,2pi]
timedat_all = doys[bkgd_ind_00] * 2 * np.pi / 365 # Conversion of all present dates to [0,2pi]
# Checking if there is data to work with...
if (len(pix_1) > 0):
# build harm reg component matrix for train and all periods
X = _build_harm_matrix(timedat_1, ns, nc)
X_all = _build_harm_matrix(timedat_all, ns, nc) # TODO: r script uses dates_00 and timedat_all, but redundant? check.
# if design matrix is of sufficient rank and non-singular...
if len(pix_1) > (ns + nc + 1) and np.abs(np.linalg.det(np.dot(X.T, X))) >= 0.001:
# solve least-squares estimation equation and fit based on brooks et al., 2014 eq. 4
# see how to do above via statsmodel here https://github.com/ChadFulton/sm-notebooks-2021/blob/main/002-seasonal-adjustment.ipynb
fit = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, pix_1))
preds_1 = np.dot(X, fit)
# Block for X-bar chart anomaly filtering
# lt: this is done to remvoe extreme residual outliers (clouds) in training residuals
resids_1 = pix_1 - preds_1
std = np.std(resids_1, ddof=1) # note: ddof 1 to match r
screen_1 = np.abs(resids_1) > (xbar_limit_1 * std)
keeps = np.where(~screen_1)[0]
# recompute a new estimate of the harmonic coefficients excluding outliers
if len(keeps) > ns + nc + 1:
X_k, pix_k = X[keeps], pix_1[keeps]
beta = np.linalg.solve(np.dot(X_k.T, X_k), np.dot(X_k.T, pix_k))
# testing
#plt.plot(pix_1, color='blue')
#plt.plot(preds_1, color='red')
#plt.show()
#plt.plot(resids_1, color='purple')
#plt.show()
# ewma component
if not np.isnan(beta[0]): # Checking for present Beta
y_0 = pix_0 - np.dot(X_all, beta).T # Residuals for all present data, based on training coefficients
y_01 = y_0[0:history_bound_01] # Training residuals only
# testing
#plt.plot(pix_0, color='blue')
#plt.plot(np.dot(X_all, beta).T, color='purple')
#plt.plot(y_0, color='red')
#plt.show()
# Testing residuals
y_02 = []
if len(y_0) > len(y_01): # TODO: lt: wouldnt full vector always be > training subset..?
y_02 = y_0[history_bound_01:len(y_0)]
mu = np.mean(y_01) # First estimate of historical mean (should be near 0)
histsd = np.std(y_01, ddof=1) # First estimate of historical SD.
ind_0 = np.arange(len(y_0)) # Index for residuals
ind_01 = ind_0[0:history_bound_01] # Index for training residuals
# Index for testing residuals
ind_02 = []
if len(y_0 > len(y_01)):
ind_02 = ind_0[history_bound_01:len(y_0)]
# Creating date information in linear form (days from a starting point instead of Julian days of the year)
ea_year = np.insert(np.repeat(365, len(np.arange(training_start, testing_end))) + 1 * (np.arange(training_start, testing_end) % 4 == 0), 0, 0)
cu_year = np.cumsum(ea_year)
x_0 = (cu_year[years - training_start] + doys)[bkgd_ind_00]
# Modifying SD estimates based on anomalous readings in the training data
ucl_0 = np.concatenate([np.repeat(xbar_limit_1, len(ind_01)), np.repeat(xbar_limit_2, len(ind_02))]) * histsd # Note that we don't want to filter out the changes in the testing data, so xBarLimit2 is much larger!
x = x_0[(pix_0 > low_thresh) & (np.abs(y_0) < ucl_0)] # Keeping only dates for which we have some vegetation and aren't anomalously far from 0 in the residuals
y = y_0[(pix_0 > low_thresh) & (np.abs(y_0) < ucl_0)] # Keeping only dates for which we have some vegetation and aren't anomalously far from 0 in the residuals
ind = ind_0[(pix_0 > low_thresh) & (np.abs(y_0) < ucl_0)] # Keeping only dates for which we have some vegetation and aren't anomalously far from 0 in the residuals
histsd = np.std(y_01[(pix_1 > low_thresh) & (np.abs(y_01) < ucl_0[0:history_bound_01])], ddof=1) ### Updating the training SD estimate. This is the all-important driver of the EWMA control limits.
if np.isnan(histsd):
#return tmp
raise
totals = np.zeros_like(y_0) # Future EWMA output
tmp_2 = np.repeat(-2222, len(y)) # Coded values for the 'present' subset of the data
ewma = y[[0]] # Initialize the EWMA outputs with the first present residual
for i in np.arange(1, len(y)):
ewma = np.append(ewma, ewma[i - 1] * (1 - lam) + lam * y[i]) # Appending new EWMA values for all present data.
# TODO: check this - added the arange 1 to + 1
ucl = histsd * lam_sigs * np.sqrt(lam / (2 - lam) * (1 - (1 - lam) ** (2 * np.arange(1, len(y) + 1)))) # EWMA upper control limit. This is the threshold which dictates when the chart signals a disturbance.
if rounding is True:
tmp_2 = np.sign(ewma) * np.floor(np.abs(ewma / ucl)) # Integer value for EWMA output relative to control limit (rounded towards 0). A value of +/-1 represents the weakest disturbance signal
elif rounding is False:
tmp_2 = np.round(ewma, 0) # EWMA outputs in terms of resdiual scales.
# testing
# plt.plot(pix_0, color='black')
# plt.plot(np.dot(X_all, beta).T, color='green') # hreg pred on all
# plt.plot(y_0, color='blue')
# plt.plot(ewma, color='orange')
# plt.plot(tmp_2, color='red')
# plt.plot(ucl, color='purple')
# plt.show()
# Keeping only values for which a disturbance is sustained, using persistence as the threshold
if persistence > 1 and len(tmp_2) > 3: # Ensuring sufficent data for tmp_2
tmp_sign = np.sign(tmp_2) # Disturbance direction
shift_points = np.concatenate([[0], np.where(tmp_sign[1:] != tmp_sign[:len(tmp_sign) - 1])[0], [len(tmp_sign) - 1]]) # Dates for which direction changes
tmp_3 = np.repeat(0, len(tmp_sign))
for i in np.arange(0, len(tmp_sign)): # Counting the consecutive dates in which directions are sustained
tmp_3_lo = 0
tmp_3_hi = 0
while(i - tmp_3_lo >= 0): # TODO: added >=
if tmp_sign[i] - tmp_sign[(i - tmp_3_lo)] == 0:
tmp_3_lo += 1
else:
break
while(tmp_3_hi + i < len(tmp_sign)): # TODO: was <=
if tmp_sign[(i + tmp_3_hi)] - tmp_sign[i] == 0:
tmp_3_hi += 1
else:
break
tmp_3[i] = tmp_3_lo + tmp_3_hi - 1
tmp_4 = np.repeat(0, len(tmp_3))
for i in np.arange(len(tmp_3)): # If sustained dates are long enough, keep; otherwise set to previous sustained state
if tmp_3[i] >= persistence:
tmp_4[i] = tmp_2[i]
else:
w_ = np.where(tmp_3[0:i + 1] >= persistence)[0]
if len(w_) == 0:
tmp_4[i] = 0
else:
m_ = np.argmax(tmp_3[w_]) # TODO: this whole rejigg is a mess, find a way to do this via pd
v_=np.max(tmp_2[m_], 0)
tmp_4[i] = v_
tmp_2 = tmp_4
tmp[bkgd_ind_00[ind]] = tmp_2 # Assigning EWMA outputs for present data to the original template. This still leaves -2222's everywhere the data was missing or filtered.
if tmp[0] == -2222: # If the first date of myPixel was missing/filtered, then assign the EWMA output as 0 (no disturbance).
tmp[0] = 0
if tmp[0] != -2222: # If we have EWMA information for the first date, then for each missing/filtered date in the record, fill with the last known EWMA value
for stepper in np.arange(1, dates):
if tmp[stepper] == -2222:
tmp[stepper] = tmp[stepper - 1]
# testing
#plt.plot(pix_0, color='black')
#plt.plot(np.dot(X_all, beta).T, color='green') # hreg pred on all
#plt.plot(tmp * 1, color='purple', marker='.')
#plt.plot(ucl * 1, color='yellow')
#plt.plot(ucl * -1, color='yellow')
#plt.title('lam = ' + str(lam))
#plt.show()
#return tmp # Final output. All -2222's if data were insufficient to run the algorithm, otherwise an EWMA record of relative (rounded=T) or raw-residual (rounded=F) format.
# lt added this
_years = years[bkgd_ind_00]
_doys = doys[bkgd_ind_00]
dates = []
for y, d in zip(_years, _doys):
dt = datetime.datetime(int(y), 1, 1) + datetime.timedelta(int(d) - 1)
dates.append(dt)
return dates, pix_0, np.dot(X_all, beta).T, tmp # pix_0
def ewmacd(
ds,
training_start=None,
training_end=None,
testing_end=None,
number_harmonics=2, # 2
xbar_limit_1=1.5,
xbar_limit_2=20,
low_thresh=100,
lam=0.3, #0.3, # close to 1 = only recent values influence ewma, close to 0 historical influence ewma more
lam_sigs=3, # 3 # constant factor to modify ucl - low is tighter ucl range (a lot will be outliers), higher value is leniant ucl range (only most extreme will be outliers)
rounding=True, # True # turning this off gets a more nuanced ewma line
persistence=3,
number_cpu=4,
write_file=False,
file_name=None
):
ns = nc = number_harmonics
# lt: subset dates via xr easier
ds = ds.sel(time=ds['time'].dt.year >= training_start)
ds = ds.sel(time=ds['time'].dt.year < testing_end)
# extract arrays of doys and years in order of xr
doys = ds['time'].dt.dayofyear.values
years = ds['time'].dt.year.values
# get index of last year in training period
history_bound = np.max(np.where(years < training_end))
# FIXME: working on only 1 pixel for now, adapt to whole netcdf
#pix = ds['ndvi'].isel(x=0, y=0).values
#pix = ds['ndvi'].median(['x', 'y']).values
pix = ds['ndvi'].values
# call per-pixel ewmacd func
dates, ndvi_y, harm_y, resi_y = ewmacd_per_pixel(pix,
ns,
nc,
history_bound,
doys,
years,
training_start,
training_end,
testing_end,
xbar_limit_1,
xbar_limit_2,
low_thresh,
lam,
lam_sigs,
rounding,
persistence)
# calc the per-pixel ewmacd func
# tmpOutput = EWMACD.pixel.
# for .calc.lt(myPixel, ns, nc, historybound, DOYs, xBarLimit1, trainingStart, testingEnd, Years, xBarLimit2,
# lowthresh, lambda, lambdasigs, rounding, persistence, trainingEnd)
#return tmp
return dates, ndvi_y, harm_y, resi_y
if __name__ == '__main__':
print('derp')
# # load raw dates csv
# date_info = pd.read_csv('./EWMACD v1.3.0/Temporal Distribution with DOY.csv')
#
# # unpack into xr friendly datetime64
# dates = []
# for i, row in date_info.iterrows():
# y, m, d = str(row['Year']), str(row['Month']), str(row['Day'])
# dt = np.datetime64(f"{y.zfill(4)}-{m.zfill(2)}-{d.zfill(2)}")
# dates.append(dt)
#
# # open raw tif
# ds = xr.open_dataset('./EWMACD v1.3.0/Sample Data - Angle Index x1000 Stack.tif')
#
# # clean up xr dataset
# ds = ds.rename({'band': 'time', 'band_data': 'ndvi'})
# ds['time'] = dates
# set all negative values to nan
# TODO: only need this for non-neg indices
#ds = ds.where(ds >= 0)
# call ewmacd main func
# ewmacd(ds=ds,
# training_start=2005,
# training_end=2008,
# testing_end=2012,
# number_cpu=1)
# testing: load netcdf normal
ds = xr.open_dataset(r"C:\Users\Lewis\Desktop\efreo ndvi all at once\ndvi.nc")
ds = ds.isel(x=slice(100, 200), y=slice(100, 200))
ds = ds.resample(time='M').median()
ds = ds * 10000 # FIXME: decimal values not working when rounding=False, need to * 10000
#ds = ds.median(['x', 'y'])
#ds = ds.isel(x=2, y=2)
vec = ewmacd(ds=ds,
training_start=2018,
training_end=2021,
testing_end=2024,
number_harmonics=2,
lam=0.3,
lam_sigs=3,
low_thresh=100,
rounding=True,
number_cpu=1)
None