Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatic threshold for peaks-over-threshold #268

Merged
merged 16 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/data/loads/data_loads_hs.csv

Large diffs are not rendered by default.

124 changes: 124 additions & 0 deletions mhkit/loads/extreme.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
from scipy import stats
from scipy import optimize
from scipy import signal
from mhkit.wave.resource import frequency_moment


Expand Down Expand Up @@ -157,6 +158,129 @@ def peaks_distribution_weibull_tail_fit(x):
return peaks


def automatic_hs_threshold(
peaks,
sampling_rate,
initial_threshold_range = (0.990, 0.995, 0.001),
max_refinement=5
):
"""
Find the best significant wave height threshold for the
peaks-over-threshold method.

This method was developed by:

> Neary, V. S., S. Ahn, B. E. Seng, M. N. Allahdadi, T. Wang, Z. Yang and R. He (2020).
> "Characterization of Extreme Wave Conditions for Wave Energy Converter Design and Project Risk Assessment.”
> J. Mar. Sci. Eng. 2020, 8(4), 289; https://doi.org/10.3390/jmse8040289.

please cite this paper if using this method.

After all thresholds in the initial range are evaluated, the search
range is refined around the optimal point until either (i) there
is minimal change from the previous refinement results, (ii) the
number of data points become smaller than about 1 per year, or (iii)
the maximum number of iterations is reached.

Parameters
----------
peaks: np.array
Peak values of the response time-series
sampling_rate: float
Sampling rate in hours.
initial_threshold_range: tuple
Initial range of thresholds to search. Described as
(min, max, step).
max_refinement: int
Maximum number of times to refine the search range.

Returns
-------
best_threshold: float
Threshold that results in the best correlation.
"""
assert isinstance(sampling_rate, (float, int)), (
'sampling_rate must be of type float')
assert isinstance(peaks, np.ndarray), 'peaks must be of type np.ndarray'
assert len(initial_threshold_range) == 3, (
'initial_threshold_range must be length 3')
assert isinstance(max_refinement, int)
cmichelenstrofer marked this conversation as resolved.
Show resolved Hide resolved

range_min, range_max, range_step = initial_threshold_range
best_threshold = -1
years = len(peaks)/(365.25*24/sampling_rate)

def _peaks_over_threshold(peaks, threshold, sampling_rate):
threshold_unit = np.percentile(peaks, 100*threshold, method='hazen')
idx_peaks = np.arange(len(peaks))
idx_storm_peaks, storm_peaks = global_peaks(
idx_peaks, peaks-threshold_unit)
idx_storm_peaks = idx_storm_peaks.astype(int)

# Two storms that are close enough (within specified window) are
# considered the same storm, to ensure independence.
independent_storm_peaks = [storm_peaks[0],]
idx_independent_storm_peaks = [idx_storm_peaks[0],]
# check first 14 days to determine window size
nlags = int(14 * 24 / sampling_rate)
x = peaks - np.mean(peaks)
acf = signal.correlate(x, x, mode="full")
lag = signal.correlation_lags(len(x), len(x), mode="full")
idx_zero = np.argmax(lag==0)
positive_lag = lag[(idx_zero):(idx_zero+nlags+1)]
acf_positive = acf[(idx_zero):(idx_zero+nlags+1)] / acf[idx_zero]

window_size = sampling_rate * positive_lag[acf_positive<0.5][0]
# window size in "observations" instead of "hours" between peaks.
window = window_size / sampling_rate
# keep only independent storm peaks
for idx in idx_storm_peaks[1:]:
if (idx - idx_independent_storm_peaks[-1]) > window:
idx_independent_storm_peaks.append(idx)
independent_storm_peaks.append(peaks[idx]-threshold_unit)
elif peaks[idx] > independent_storm_peaks[-1]:
idx_independent_storm_peaks[-1] = idx
independent_storm_peaks[-1] = peaks[idx]-threshold_unit

return independent_storm_peaks

for i in range(max_refinement):
thresholds = np.arange(range_min, range_max, range_step)
correlations = []

for threshold in thresholds:
distribution = stats.genpareto
over_threshold = _peaks_over_threshold(
peaks, threshold, sampling_rate)
rate_per_year = len(over_threshold) / years
if rate_per_year < 2:
break
distributions_parameters = distribution.fit(
over_threshold, floc=0.)
_, (_, _, correlation) = stats.probplot(
peaks, distributions_parameters, distribution, fit=True)
correlations.append(correlation)

max_i = np.argmax(correlations)
minimal_change = np.abs(best_threshold - thresholds[max_i]) < 0.0005
best_threshold = thresholds[max_i]
if minimal_change and i<max_refinement-1:
break
range_step /= 10
if max_i == len(thresholds)-1:
range_min = thresholds[max_i - 1]
range_max = thresholds[max_i] + 5*range_step
elif max_i == 0:
range_min = thresholds[max_i] - 9*range_step
range_max = thresholds[max_i + 1]
else:
range_min = thresholds[max_i-1]
range_max = thresholds[max_i+1]

best_threshold_unit = np.percentile(peaks, 100*best_threshold, method='hazen')
return best_threshold, best_threshold_unit


def peaks_distribution_peaks_over_threshold(x, threshold=None):
"""
Estimate the peaks distribution using the peaks over threshold
Expand Down
7 changes: 7 additions & 0 deletions mhkit/tests/loads/test_loads.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@ def test_shortterm_extreme(self):
ste = loads.extreme.ste(t, data, t_st, method)
assert_allclose(ste.cdf(x), cdf_1)

def test_automatic_threshold(self):
filename = "data_loads_hs.csv"
data = np.loadtxt(os.path.join(datadir, filename), delimiter=",")
years = 2.97
pct, threshold = loads.extreme.automatic_hs_threshold(data, years)
assert np.isclose(pct, 0.9913)
assert np.isclose(threshold, 1.032)
akeeste marked this conversation as resolved.
Show resolved Hide resolved

if __name__ == '__main__':
unittest.main()
Loading