Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Doc fixes #617

Merged
merged 2 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyleoclim/core/ensembleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num

Matplotlib axis on which to return the plot. The default is None.

plot_legend : bool; {True,False}, optional
legend : bool; {True,False}, optional

Whether to plot the legend. The default is True.

Expand Down
45 changes: 37 additions & 8 deletions pyleoclim/core/multipleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,10 +730,27 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s

return ms

def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, method='phaseran', number=1000,
def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearsonr',
method='phaseran', number=1000, settings=None,
fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
''' Calculate the correlation between a MultipleSeries and a target Series

The significance of the correlation is assessed using one of the following methods:

1. 'ttest': T-test adjusted for effective sample size, see [1]
2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
5. 'built-in': uses built-in method from scipy (function of the statistic used)

Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral"

The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances.
The others are non-parametric, but their computational requirements scale with the number of simulations.

The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter.


Parameters
----------
target : pyleoclim.Series, optional
Expand All @@ -748,17 +765,29 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met

The significance level (0.05 by default)

statistic : str
statistic being evaluated. Can use any of the SciPy-supported ones:
https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests
Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
Default: 'pearsonr'.

method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'phaseran'
- 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
- 'built-in' uses the p-value that ships with the SciPy function.
- 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals
- 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals
- 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning.
Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case.

settings : dict

Parameters for the correlation function (per scipy)

number : int

the number of simulations (default: 1000)

method : str, {'ttest', 'ar1sim', 'phaseran' (default)}

method for significance testing
the number of simulations (default: 1000)

fdr_kwargs : dict

Expand Down Expand Up @@ -840,7 +869,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met
print("Looping over "+ str(len(self.series_list)) +" Series in collection")
for idx, ts in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar):
corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
method=method, number=number,
method=method, number=number, statistic=statistic,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
Expand Down
21 changes: 11 additions & 10 deletions pyleoclim/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3534,11 +3534,11 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =

The significance of the correlation is assessed using one of the following methods:

1) 'ttest': T-test adjusted for effective sample size, see [1]
2) 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3) 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
3) 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
4) 'built-in': uses built-in method
1. 'ttest': T-test adjusted for effective sample size, see [1]
2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
5. 'built-in': uses built-in method from scipy (function of the statistic used)

Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral"

Expand All @@ -3561,12 +3561,13 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =
Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
Default: 'pearsonr'.

method : str, {'ttest','built-in','ar1sim','phaseran'}
method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'phaseran'
* 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
* 'built-in' uses the p-value that ships with the SciPy function.
* 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) seires fitted to the originals
* 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
- 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
- 'built-in' uses the p-value that ships with the SciPy function.
- 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals
- 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals
- 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning.
Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case.

Expand Down
54 changes: 35 additions & 19 deletions pyleoclim/tests/test_core_MultipleSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def gen_colored_noise(alpha=1, nt=100, std=1.0, f0=None, m=None, seed=None):
v = colored_noise(alpha=alpha, t=t, std=std, f0=f0, m=m, seed=seed)
return t, v

def load_data():
def load_ms():
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
Expand Down Expand Up @@ -450,34 +450,56 @@ class TestMultipleSeriesStackPlot():
@pytest.mark.parametrize('labels', [None, 'auto', ['soi','nino']])
def test_StackPlot_t0(self, labels):

ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(labels=labels)
pyleo.closefig(fig)

@pytest.mark.parametrize('plot_kwargs', [{'marker':'o'},[{'marker':'o'},{'marker':'^'}]])
def test_StackPlot_t1(self, plot_kwargs):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(plot_kwargs=plot_kwargs)
pyleo.closefig(fig)

@pytest.mark.parametrize('ylims', ['spacious', 'auto'])
def test_StackPlot_t2(self, ylims):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(ylims=ylims)
pyleo.closefig(fig)

@pytest.mark.parametrize('yticks_minor', [True, False])
def test_StackPlot_t3(self, yticks_minor):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(yticks_minor=yticks_minor)
pyleo.closefig(fig)

@pytest.mark.parametrize('xticks_minor', [True, False])
def test_StackPlot_t4(self, xticks_minor):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(xticks_minor=xticks_minor)
pyleo.closefig(fig)


class TestMultipleSeriesCorrelation():
''' Test for MultipleSeries.spectral
'''
@pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran','CN'])
@pytest.mark.parametrize('number', [2,5])
def test_correlation_t0(self, sig_method, number):
''' Test the various significance methods
'''
ms = load_ms()
corr = ms.correlation(method=sig_method,number=number)

@pytest.mark.parametrize('stat', ['linregress','pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'])
def test_correlation_t1(self, stat, eps=0.2):
''' Test the various statistics
'''
ms = load_ms()
if stat == 'weightedtau':
corr = ms.correlation(statistic=stat)
else:
corr = ms.correlation(statistic=stat,method='built-in')

class TestMultipleSeriesSpectral():
''' Test for MultipleSeries.spectral
'''
Expand All @@ -486,33 +508,27 @@ def test_spectral_t0(self,spec_method):
'''Test the spectral function with pre-generated scalogram objects
'''

ms = load_data()
ms = load_ms()
if spec_method == 'cwt':
ms = ms.interp()
scals = ms.wavelet(method=spec_method)
ms.spectral(method=spec_method,scalogram_list=scals)

class TestToCSV:
def test_to_csv_default(self):
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
ms = load_ms()
ms.to_csv()
os.unlink('MultipleSeries.csv')
os.unlink('MultipleSeries.csv') # clean up after yourself!
def test_to_csv_label(self):
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
ms = load_ms()
ms.label='enso series'
ms.to_csv()
os.unlink('enso_series.csv') # this check that the file does exist
os.unlink('enso_series.csv') # clean up after yourself!
def test_to_csv_label_path(self):
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
ms = load_ms()
ms.label='enso wah wah'
ms.to_csv(path='./enso.csv')
os.unlink('enso.csv') # this check that the file does exist
os.unlink('enso.csv') # clean up after yourself!

class TestRemove:
def test_remove(self):
Expand Down
Loading