From 75f171c3bb4510969d65389832683f1513e35e37 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Thu, 10 Oct 2024 15:24:33 -0700 Subject: [PATCH 1/2] series.correlation() also, replaced plot_legend keyword in EnsemlbleSeries.plot_traces() doc --- pyleoclim/core/ensembleseries.py | 2 +- pyleoclim/core/series.py | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pyleoclim/core/ensembleseries.py b/pyleoclim/core/ensembleseries.py index 6617c16b..2bba6560 100644 --- a/pyleoclim/core/ensembleseries.py +++ b/pyleoclim/core/ensembleseries.py @@ -728,7 +728,7 @@ def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num Matplotlib axis on which to return the plot. The default is None. - plot_legend : bool; {True,False}, optional + legend : bool; {True,False}, optional Whether to plot the legend. The default is True. diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 96371c8d..4ff78f77 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3534,11 +3534,11 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = The significance of the correlation is assessed using one of the following methods: - 1) 'ttest': T-test adjusted for effective sample size, see [1] - 2) 'ar1sim': AR(1) modeling of x and y (Monte Carlo method) - 3) 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method) - 3) 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2] - 4) 'built-in': uses built-in method + 1. 'ttest': T-test adjusted for effective sample size, see [1] + 2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method) + 3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method) + 4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2] + 5. 'built-in': uses built-in method from scipy (function of the statistic used) Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral" @@ -3561,12 +3561,13 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'] Default: 'pearsonr'. - method : str, {'ttest','built-in','ar1sim','phaseran'} + method : str, {'ttest','built-in','ar1sim','phaseran','CN'} method for significance testing. Default is 'phaseran' - * 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1] - * 'built-in' uses the p-value that ships with the SciPy function. - * 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) seires fitted to the originals - * 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2]) + - 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1] + - 'built-in' uses the p-value that ships with the SciPy function. + - 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals + - 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals + - 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2]) The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning. Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case. From 8c372d8d8190eb8b98ac9391afb1aeb11c322f6e Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Thu, 10 Oct 2024 16:05:05 -0700 Subject: [PATCH 2/2] fixed ms.correlation() docstring and made unit tests --- pyleoclim/core/multipleseries.py | 45 ++++++++++++++--- pyleoclim/tests/test_core_MultipleSeries.py | 54 +++++++++++++-------- 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/pyleoclim/core/multipleseries.py b/pyleoclim/core/multipleseries.py index baf237f0..ef38b78f 100644 --- a/pyleoclim/core/multipleseries.py +++ b/pyleoclim/core/multipleseries.py @@ -730,10 +730,27 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s return ms - def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, method='phaseran', number=1000, + def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearsonr', + method='phaseran', number=1000, settings=None, fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None): ''' Calculate the correlation between a MultipleSeries and a target Series + The significance of the correlation is assessed using one of the following methods: + + 1. 'ttest': T-test adjusted for effective sample size, see [1] + 2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method) + 3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method) + 4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2] + 5. 'built-in': uses built-in method from scipy (function of the statistic used) + + Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral" + + The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances. + The others are non-parametric, but their computational requirements scale with the number of simulations. + + The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter. + + Parameters ---------- target : pyleoclim.Series, optional @@ -748,17 +765,29 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met The significance level (0.05 by default) + statistic : str + statistic being evaluated. Can use any of the SciPy-supported ones: + https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests + Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'] + Default: 'pearsonr'. + + method : str, {'ttest','built-in','ar1sim','phaseran','CN'} + method for significance testing. Default is 'phaseran' + - 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1] + - 'built-in' uses the p-value that ships with the SciPy function. + - 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals + - 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals + - 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2]) + The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning. + Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case. + settings : dict - + Parameters for the correlation function (per scipy) number : int - the number of simulations (default: 1000) - - method : str, {'ttest', 'ar1sim', 'phaseran' (default)} - - method for significance testing + the number of simulations (default: 1000) fdr_kwargs : dict @@ -840,7 +869,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met print("Looping over "+ str(len(self.series_list)) +" Series in collection") for idx, ts in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar): corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings, - method=method, number=number, + method=method, number=number, statistic=statistic, common_time_kwargs=common_time_kwargs, seed=seed) r_list.append(corr_res.r) signif_list.append(corr_res.signif) diff --git a/pyleoclim/tests/test_core_MultipleSeries.py b/pyleoclim/tests/test_core_MultipleSeries.py index 298c6f61..4731bd86 100644 --- a/pyleoclim/tests/test_core_MultipleSeries.py +++ b/pyleoclim/tests/test_core_MultipleSeries.py @@ -45,7 +45,7 @@ def gen_colored_noise(alpha=1, nt=100, std=1.0, f0=None, m=None, seed=None): v = colored_noise(alpha=alpha, t=t, std=std, f0=f0, m=m, seed=seed) return t, v -def load_data(): +def load_ms(): soi = pyleo.utils.load_dataset('SOI') nino = pyleo.utils.load_dataset('NINO3') ms = soi & nino @@ -450,34 +450,56 @@ class TestMultipleSeriesStackPlot(): @pytest.mark.parametrize('labels', [None, 'auto', ['soi','nino']]) def test_StackPlot_t0(self, labels): - ms = load_data() + ms = load_ms() fig, ax = ms.stackplot(labels=labels) pyleo.closefig(fig) @pytest.mark.parametrize('plot_kwargs', [{'marker':'o'},[{'marker':'o'},{'marker':'^'}]]) def test_StackPlot_t1(self, plot_kwargs): - ms = load_data() + ms = load_ms() fig, ax = ms.stackplot(plot_kwargs=plot_kwargs) pyleo.closefig(fig) @pytest.mark.parametrize('ylims', ['spacious', 'auto']) def test_StackPlot_t2(self, ylims): - ms = load_data() + ms = load_ms() fig, ax = ms.stackplot(ylims=ylims) pyleo.closefig(fig) @pytest.mark.parametrize('yticks_minor', [True, False]) def test_StackPlot_t3(self, yticks_minor): - ms = load_data() + ms = load_ms() fig, ax = ms.stackplot(yticks_minor=yticks_minor) pyleo.closefig(fig) @pytest.mark.parametrize('xticks_minor', [True, False]) def test_StackPlot_t4(self, xticks_minor): - ms = load_data() + ms = load_ms() fig, ax = ms.stackplot(xticks_minor=xticks_minor) pyleo.closefig(fig) + +class TestMultipleSeriesCorrelation(): + ''' Test for MultipleSeries.spectral + ''' + @pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran','CN']) + @pytest.mark.parametrize('number', [2,5]) + def test_correlation_t0(self, sig_method, number): + ''' Test the various significance methods + ''' + ms = load_ms() + corr = ms.correlation(method=sig_method,number=number) + + @pytest.mark.parametrize('stat', ['linregress','pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']) + def test_correlation_t1(self, stat, eps=0.2): + ''' Test the various statistics + ''' + ms = load_ms() + if stat == 'weightedtau': + corr = ms.correlation(statistic=stat) + else: + corr = ms.correlation(statistic=stat,method='built-in') + class TestMultipleSeriesSpectral(): ''' Test for MultipleSeries.spectral ''' @@ -486,7 +508,7 @@ def test_spectral_t0(self,spec_method): '''Test the spectral function with pre-generated scalogram objects ''' - ms = load_data() + ms = load_ms() if spec_method == 'cwt': ms = ms.interp() scals = ms.wavelet(method=spec_method) @@ -494,25 +516,19 @@ def test_spectral_t0(self,spec_method): class TestToCSV: def test_to_csv_default(self): - soi = pyleo.utils.load_dataset('SOI') - nino = pyleo.utils.load_dataset('NINO3') - ms = soi & nino + ms = load_ms() ms.to_csv() - os.unlink('MultipleSeries.csv') + os.unlink('MultipleSeries.csv') # clean up after yourself! def test_to_csv_label(self): - soi = pyleo.utils.load_dataset('SOI') - nino = pyleo.utils.load_dataset('NINO3') - ms = soi & nino + ms = load_ms() ms.label='enso series' ms.to_csv() - os.unlink('enso_series.csv') # this check that the file does exist + os.unlink('enso_series.csv') # clean up after yourself! def test_to_csv_label_path(self): - soi = pyleo.utils.load_dataset('SOI') - nino = pyleo.utils.load_dataset('NINO3') - ms = soi & nino + ms = load_ms() ms.label='enso wah wah' ms.to_csv(path='./enso.csv') - os.unlink('enso.csv') # this check that the file does exist + os.unlink('enso.csv') # clean up after yourself! class TestRemove: def test_remove(self):